4 * Copyright(c) 2014-2015 Chelsio Communications.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Chelsio Communications nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <linux/if_ether.h>
35 #include <sys/queue.h>
43 #include <netinet/in.h>
45 #include <rte_byteorder.h>
46 #include <rte_common.h>
47 #include <rte_cycles.h>
48 #include <rte_interrupts.h>
50 #include <rte_debug.h>
52 #include <rte_atomic.h>
53 #include <rte_branch_prediction.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_tailq.h>
58 #include <rte_alarm.h>
59 #include <rte_ether.h>
60 #include <rte_ethdev.h>
61 #include <rte_atomic.h>
62 #include <rte_malloc.h>
63 #include <rte_random.h>
72 * Rx buffer sizes for "usembufs" Free List buffers (one ingress packet
73 * per mbuf buffer). We currently only support two sizes for 1500- and
74 * 9000-byte MTUs. We could easily support more but there doesn't seem to be
75 * much need for that ...
77 #define FL_MTU_SMALL 1500
78 #define FL_MTU_LARGE 9000
80 static inline unsigned int fl_mtu_bufsize(struct adapter *adapter,
83 struct sge *s = &adapter->sge;
85 return ALIGN(s->pktshift + ETH_HLEN + VLAN_HLEN + mtu, s->fl_align);
88 #define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL)
89 #define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE)
92 * Bits 0..3 of rx_sw_desc.dma_addr have special meaning. The hardware uses
93 * these to specify the buffer size as an index into the SGE Free List Buffer
94 * Size register array. We also use bit 4, when the buffer has been unmapped
95 * for DMA, but this is of course never sent to the hardware and is only used
96 * to prevent double unmappings. All of the above requires that the Free List
97 * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are
98 * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal
99 * Free List Buffer alignment is 32 bytes, this works out for us ...
102 RX_BUF_FLAGS = 0x1f, /* bottom five bits are special */
103 RX_BUF_SIZE = 0x0f, /* bottom three bits are for buf sizes */
104 RX_UNMAPPED_BUF = 0x10, /* buffer is not mapped */
107 * XXX We shouldn't depend on being able to use these indices.
108 * XXX Especially when some other Master PF has initialized the
109 * XXX adapter or we use the Firmware Configuration File. We
110 * XXX should really search through the Host Buffer Size register
111 * XXX array for the appropriately sized buffer indices.
113 RX_SMALL_PG_BUF = 0x0, /* small (PAGE_SIZE) page buffer */
114 RX_LARGE_PG_BUF = 0x1, /* buffer large page buffer */
116 RX_SMALL_MTU_BUF = 0x2, /* small MTU buffer */
117 RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */
121 * t4_sge_init - initialize SGE
124 * Performs SGE initialization needed every time after a chip reset.
125 * We do not initialize any of the queues here, instead the driver
126 * top-level must request those individually.
128 * Called in two different modes:
130 * 1. Perform actual hardware initialization and record hard-coded
131 * parameters which were used. This gets used when we're the
132 * Master PF and the Firmware Configuration File support didn't
133 * work for some reason.
135 * 2. We're not the Master PF or initialization was performed with
136 * a Firmware Configuration File. In this case we need to grab
137 * any of the SGE operating parameters that we need to have in
138 * order to do our job and make sure we can live with them ...
140 static int t4_sge_init_soft(struct adapter *adap)
142 struct sge *s = &adap->sge;
143 u32 fl_small_pg, fl_large_pg, fl_small_mtu, fl_large_mtu;
144 u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5;
145 u32 ingress_rx_threshold;
148 * Verify that CPL messages are going to the Ingress Queue for
149 * process_responses() and that only packet data is going to the
152 if ((t4_read_reg(adap, A_SGE_CONTROL) & F_RXPKTCPLMODE) !=
153 V_RXPKTCPLMODE(X_RXPKTCPLMODE_SPLIT)) {
154 dev_err(adap, "bad SGE CPL MODE\n");
159 * Validate the Host Buffer Register Array indices that we want to
162 * XXX Note that we should really read through the Host Buffer Size
163 * XXX register array and find the indices of the Buffer Sizes which
164 * XXX meet our needs!
166 #define READ_FL_BUF(x) \
167 t4_read_reg(adap, A_SGE_FL_BUFFER_SIZE0 + (x) * sizeof(u32))
169 fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF);
170 fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF);
171 fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF);
172 fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF);
175 * We only bother using the Large Page logic if the Large Page Buffer
176 * is larger than our Page Size Buffer.
178 if (fl_large_pg <= fl_small_pg)
184 * The Page Size Buffer must be exactly equal to our Page Size and the
185 * Large Page Size Buffer should be 0 (per above) or a power of 2.
187 if (fl_small_pg != PAGE_SIZE ||
188 (fl_large_pg & (fl_large_pg - 1)) != 0) {
189 dev_err(adap, "bad SGE FL page buffer sizes [%d, %d]\n",
190 fl_small_pg, fl_large_pg);
194 s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT;
196 if (adap->use_unpacked_mode) {
199 if (fl_small_mtu < FL_MTU_SMALL_BUFSIZE(adap)) {
200 dev_err(adap, "bad SGE FL small MTU %d\n",
204 if (fl_large_mtu < FL_MTU_LARGE_BUFSIZE(adap)) {
205 dev_err(adap, "bad SGE FL large MTU %d\n",
214 * Retrieve our RX interrupt holdoff timer values and counter
215 * threshold values from the SGE parameters.
217 timer_value_0_and_1 = t4_read_reg(adap, A_SGE_TIMER_VALUE_0_AND_1);
218 timer_value_2_and_3 = t4_read_reg(adap, A_SGE_TIMER_VALUE_2_AND_3);
219 timer_value_4_and_5 = t4_read_reg(adap, A_SGE_TIMER_VALUE_4_AND_5);
220 s->timer_val[0] = core_ticks_to_us(adap,
221 G_TIMERVALUE0(timer_value_0_and_1));
222 s->timer_val[1] = core_ticks_to_us(adap,
223 G_TIMERVALUE1(timer_value_0_and_1));
224 s->timer_val[2] = core_ticks_to_us(adap,
225 G_TIMERVALUE2(timer_value_2_and_3));
226 s->timer_val[3] = core_ticks_to_us(adap,
227 G_TIMERVALUE3(timer_value_2_and_3));
228 s->timer_val[4] = core_ticks_to_us(adap,
229 G_TIMERVALUE4(timer_value_4_and_5));
230 s->timer_val[5] = core_ticks_to_us(adap,
231 G_TIMERVALUE5(timer_value_4_and_5));
233 ingress_rx_threshold = t4_read_reg(adap, A_SGE_INGRESS_RX_THRESHOLD);
234 s->counter_val[0] = G_THRESHOLD_0(ingress_rx_threshold);
235 s->counter_val[1] = G_THRESHOLD_1(ingress_rx_threshold);
236 s->counter_val[2] = G_THRESHOLD_2(ingress_rx_threshold);
237 s->counter_val[3] = G_THRESHOLD_3(ingress_rx_threshold);
242 int t4_sge_init(struct adapter *adap)
244 struct sge *s = &adap->sge;
245 u32 sge_control, sge_control2, sge_conm_ctrl;
246 unsigned int ingpadboundary, ingpackboundary;
247 int ret, egress_threshold;
250 * Ingress Padding Boundary and Egress Status Page Size are set up by
251 * t4_fixup_host_params().
253 sge_control = t4_read_reg(adap, A_SGE_CONTROL);
254 s->pktshift = G_PKTSHIFT(sge_control);
255 s->stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64;
258 * T4 uses a single control field to specify both the PCIe Padding and
259 * Packing Boundary. T5 introduced the ability to specify these
260 * separately. The actual Ingress Packet Data alignment boundary
261 * within Packed Buffer Mode is the maximum of these two
264 ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) +
265 X_INGPADBOUNDARY_SHIFT);
266 s->fl_align = ingpadboundary;
268 if (!is_t4(adap->params.chip) && !adap->use_unpacked_mode) {
270 * T5 has a weird interpretation of one of the PCIe Packing
271 * Boundary values. No idea why ...
273 sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2);
274 ingpackboundary = G_INGPACKBOUNDARY(sge_control2);
275 if (ingpackboundary == X_INGPACKBOUNDARY_16B)
276 ingpackboundary = 16;
278 ingpackboundary = 1 << (ingpackboundary +
279 X_INGPACKBOUNDARY_SHIFT);
281 s->fl_align = max(ingpadboundary, ingpackboundary);
284 ret = t4_sge_init_soft(adap);
286 dev_err(adap, "%s: t4_sge_init_soft failed, error %d\n",
292 * A FL with <= fl_starve_thres buffers is starving and a periodic
293 * timer will attempt to refill it. This needs to be larger than the
294 * SGE's Egress Congestion Threshold. If it isn't, then we can get
295 * stuck waiting for new packets while the SGE is waiting for us to
296 * give it more Free List entries. (Note that the SGE's Egress
297 * Congestion Threshold is in units of 2 Free List pointers.) For T4,
298 * there was only a single field to control this. For T5 there's the
299 * original field which now only applies to Unpacked Mode Free List
300 * buffers and a new field which only applies to Packed Mode Free List
303 sge_conm_ctrl = t4_read_reg(adap, A_SGE_CONM_CTRL);
304 if (is_t4(adap->params.chip) || adap->use_unpacked_mode)
305 egress_threshold = G_EGRTHRESHOLD(sge_conm_ctrl);
307 egress_threshold = G_EGRTHRESHOLDPACKING(sge_conm_ctrl);
308 s->fl_starve_thres = 2 * egress_threshold + 1;