drivers/net/enic/base/vnic_wq.h

   1 /*
   2  * Copyright 2008-2010 Cisco Systems, Inc.  All rights reserved.
   3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
   4  *
   5  * Copyright (c) 2014, Cisco Systems, Inc.
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  * notice, this list of conditions and the following disclaimer.
  14  *
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  * notice, this list of conditions and the following disclaimer in
  17  * the documentation and/or other materials provided with the
  18  * distribution.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  24  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  */
  34
  35 #ifndef _VNIC_WQ_H_
  36 #define _VNIC_WQ_H_
  37
  38
  39 #include "vnic_dev.h"
  40 #include "vnic_cq.h"
  41
  42 /* Work queue control */
  43 struct vnic_wq_ctrl {
  44         u64 ring_base;                  /* 0x00 */
  45         u32 ring_size;                  /* 0x08 */
  46         u32 pad0;
  47         u32 posted_index;               /* 0x10 */
  48         u32 pad1;
  49         u32 cq_index;                   /* 0x18 */
  50         u32 pad2;
  51         u32 enable;                     /* 0x20 */
  52         u32 pad3;
  53         u32 running;                    /* 0x28 */
  54         u32 pad4;
  55         u32 fetch_index;                /* 0x30 */
  56         u32 pad5;
  57         u32 dca_value;                  /* 0x38 */
  58         u32 pad6;
  59         u32 error_interrupt_enable;     /* 0x40 */
  60         u32 pad7;
  61         u32 error_interrupt_offset;     /* 0x48 */
  62         u32 pad8;
  63         u32 error_status;               /* 0x50 */
  64         u32 pad9;
  65 };
  66
  67 struct vnic_wq_buf {
  68         struct vnic_wq_buf *next;
  69         dma_addr_t dma_addr;
  70         void *os_buf;
  71         unsigned int len;
  72         unsigned int index;
  73         int sop;
  74         void *desc;
  75         uint64_t wr_id; /* Cookie */
  76         uint8_t cq_entry; /* Gets completion event from hw */
  77         uint8_t desc_skip_cnt; /* Num descs to occupy */
  78         uint8_t compressed_send; /* Both hdr and payload in one desc */
  79 };
  80
  81 /* Break the vnic_wq_buf allocations into blocks of 32/64 entries */
  82 #define VNIC_WQ_BUF_MIN_BLK_ENTRIES 32
  83 #define VNIC_WQ_BUF_DFLT_BLK_ENTRIES 64
  84 #define VNIC_WQ_BUF_BLK_ENTRIES(entries) \
  85         ((unsigned int)((entries < VNIC_WQ_BUF_DFLT_BLK_ENTRIES) ? \
  86         VNIC_WQ_BUF_MIN_BLK_ENTRIES : VNIC_WQ_BUF_DFLT_BLK_ENTRIES))
  87 #define VNIC_WQ_BUF_BLK_SZ(entries) \
  88         (VNIC_WQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_wq_buf))
  89 #define VNIC_WQ_BUF_BLKS_NEEDED(entries) \
  90         DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES(entries))
  91 #define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096)
  92
  93 struct vnic_wq {
  94         unsigned int index;
  95         struct vnic_dev *vdev;
  96         struct vnic_wq_ctrl __iomem *ctrl;              /* memory-mapped */
  97         struct vnic_dev_ring ring;
  98         struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX];
  99         struct vnic_wq_buf *to_use;
 100         struct vnic_wq_buf *to_clean;
 101         unsigned int pkts_outstanding;
 102         unsigned int socket_id;
 103 };
 104
 105 static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq)
 106 {
 107         /* how many does SW own? */
 108         return wq->ring.desc_avail;
 109 }
 110
 111 static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq)
 112 {
 113         /* how many does HW own? */
 114         return wq->ring.desc_count - wq->ring.desc_avail - 1;
 115 }
 116
 117 static inline void *vnic_wq_next_desc(struct vnic_wq *wq)
 118 {
 119         return wq->to_use->desc;
 120 }
 121
 122 #define PI_LOG2_CACHE_LINE_SIZE        5
 123 #define PI_INDEX_BITS            12
 124 #define PI_INDEX_MASK ((1U << PI_INDEX_BITS) - 1)
 125 #define PI_PREFETCH_LEN_MASK ((1U << PI_LOG2_CACHE_LINE_SIZE) - 1)
 126 #define PI_PREFETCH_LEN_OFF 16
 127 #define PI_PREFETCH_ADDR_BITS 43
 128 #define PI_PREFETCH_ADDR_MASK ((1ULL << PI_PREFETCH_ADDR_BITS) - 1)
 129 #define PI_PREFETCH_ADDR_OFF 21
 130
 131 /** How many cache lines are touched by buffer (addr, len). */
 132 static inline unsigned int num_cache_lines_touched(dma_addr_t addr,
 133                                                         unsigned int len)
 134 {
 135         const unsigned long mask = PI_PREFETCH_LEN_MASK;
 136         const unsigned long laddr = (unsigned long)addr;
 137         unsigned long lines, equiv_len;
 138         /* A. If addr is aligned, our solution is just to round up len to the
 139         next boundary.
 140
 141         e.g. addr = 0, len = 48
 142         +--------------------+
 143         |XXXXXXXXXXXXXXXXXXXX|    32-byte cacheline a
 144         +--------------------+
 145         |XXXXXXXXXX          |    cacheline b
 146         +--------------------+
 147
 148         B. If addr is not aligned, however, we may use an extra
 149         cacheline.  e.g. addr = 12, len = 22
 150
 151         +--------------------+
 152         |       XXXXXXXXXXXXX|
 153         +--------------------+
 154         |XX                  |
 155         +--------------------+
 156
 157         Our solution is to make the problem equivalent to case A
 158         above by adding the empty space in the first cacheline to the length:
 159         unsigned long len;
 160
 161         +--------------------+
 162         |eeeeeeeXXXXXXXXXXXXX|    "e" is empty space, which we add to len
 163         +--------------------+
 164         |XX                  |
 165         +--------------------+
 166
 167         */
 168         equiv_len = len + (laddr & mask);
 169
 170         /* Now we can just round up this len to the next 32-byte boundary. */
 171         lines = (equiv_len + mask) & (~mask);
 172
 173         /* Scale bytes -> cachelines. */
 174         return lines >> PI_LOG2_CACHE_LINE_SIZE;
 175 }
 176
 177 static inline u64 vnic_cached_posted_index(dma_addr_t addr, unsigned int len,
 178                                                 unsigned int index)
 179 {
 180         unsigned int num_cache_lines = num_cache_lines_touched(addr, len);
 181         /* Wish we could avoid a branch here.  We could have separate
 182          * vnic_wq_post() and vinc_wq_post_inline(), the latter
 183          * only supporting < 1k (2^5 * 2^5) sends, I suppose.  This would
 184          * eliminate the if (eop) branch as well.
 185          */
 186         if (num_cache_lines > PI_PREFETCH_LEN_MASK)
 187                 num_cache_lines = 0;
 188         return (index & PI_INDEX_MASK) |
 189         ((num_cache_lines & PI_PREFETCH_LEN_MASK) << PI_PREFETCH_LEN_OFF) |
 190                 (((addr >> PI_LOG2_CACHE_LINE_SIZE) &
 191         PI_PREFETCH_ADDR_MASK) << PI_PREFETCH_ADDR_OFF);
 192 }
 193
 194 static inline void vnic_wq_post(struct vnic_wq *wq,
 195         void *os_buf, dma_addr_t dma_addr,
 196         unsigned int len, int sop, int eop,
 197         uint8_t desc_skip_cnt, uint8_t cq_entry,
 198         uint8_t compressed_send, uint64_t wrid)
 199 {
 200         struct vnic_wq_buf *buf = wq->to_use;
 201
 202         buf->sop = sop;
 203         buf->cq_entry = cq_entry;
 204         buf->compressed_send = compressed_send;
 205         buf->desc_skip_cnt = desc_skip_cnt;
 206         buf->os_buf = os_buf;
 207         buf->dma_addr = dma_addr;
 208         buf->len = len;
 209         buf->wr_id = wrid;
 210
 211         buf = buf->next;
 212         if (eop) {
 213 #ifdef DO_PREFETCH
 214                 uint64_t wr = vnic_cached_posted_index(dma_addr, len,
 215                                                         buf->index);
 216 #endif
 217                 /* Adding write memory barrier prevents compiler and/or CPU
 218                  * reordering, thus avoiding descriptor posting before
 219                  * descriptor is initialized. Otherwise, hardware can read
 220                  * stale descriptor fields.
 221                  */
 222                 wmb();
 223 #ifdef DO_PREFETCH
 224                 /* Intel chipsets seem to limit the rate of PIOs that we can
 225                  * push on the bus.  Thus, it is very important to do a single
 226                  * 64 bit write here.  With two 32-bit writes, my maximum
 227                  * pkt/sec rate was cut almost in half. -AJF
 228                  */
 229                 iowrite64((uint64_t)wr, &wq->ctrl->posted_index);
 230 #else
 231                 iowrite32(buf->index, &wq->ctrl->posted_index);
 232 #endif
 233         }
 234         wq->to_use = buf;
 235
 236         wq->ring.desc_avail -= desc_skip_cnt;
 237 }
 238
 239 static inline void vnic_wq_service(struct vnic_wq *wq,
 240         struct cq_desc *cq_desc, u16 completed_index,
 241         void (*buf_service)(struct vnic_wq *wq,
 242         struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque),
 243         void *opaque)
 244 {
 245         struct vnic_wq_buf *buf;
 246
 247         buf = wq->to_clean;
 248         while (1) {
 249
 250                 (*buf_service)(wq, cq_desc, buf, opaque);
 251
 252                 wq->ring.desc_avail++;
 253
 254                 wq->to_clean = buf->next;
 255
 256                 if (buf->index == completed_index)
 257                         break;
 258
 259                 buf = wq->to_clean;
 260         }
 261 }
 262
 263 void vnic_wq_free(struct vnic_wq *wq);
 264 int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
 265         unsigned int desc_count, unsigned int desc_size);
 266 void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index,
 267         unsigned int fetch_index, unsigned int posted_index,
 268         unsigned int error_interrupt_enable,
 269         unsigned int error_interrupt_offset);
 270 void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
 271         unsigned int error_interrupt_enable,
 272         unsigned int error_interrupt_offset);
 273 void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error);
 274 unsigned int vnic_wq_error_status(struct vnic_wq *wq);
 275 void vnic_wq_enable(struct vnic_wq *wq);
 276 int vnic_wq_disable(struct vnic_wq *wq);
 277 void vnic_wq_clean(struct vnic_wq *wq,
 278         void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf));
 279 int vnic_wq_mem_size(struct vnic_wq *wq, unsigned int desc_count,
 280         unsigned int desc_size);
 281
 282 #endif /* _VNIC_WQ_H_ */