1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2018, Microsoft Corporation.
12 #include <rte_tailq.h>
14 #include <rte_malloc.h>
16 #include <rte_atomic.h>
17 #include <rte_memory.h>
18 #include <rte_bus_vmbus.h>
23 vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
25 /* Use GCC builtin which atomic does atomic OR operation */
26 __sync_or_and_fetch(addr, mask);
30 vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id)
32 uint32_t *monitor_addr, monitor_mask;
33 unsigned int trigger_index;
35 trigger_index = monitor_id / HV_MON_TRIG_LEN;
36 monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
38 monitor_addr = &dev->monitor_page->trigs[trigger_index].pending;
39 vmbus_sync_set_bit(monitor_addr, monitor_mask);
43 vmbus_set_event(const struct rte_vmbus_device *dev,
44 const struct vmbus_channel *chan)
46 vmbus_set_monitor(dev, chan->monitor_id);
50 * Set the wait between when hypervisor examines the trigger.
53 rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
54 const struct vmbus_channel *chan,
57 uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
58 uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
60 if (latency >= UINT16_MAX * 100) {
61 VMBUS_LOG(ERR, "invalid latency value %u", latency);
65 if (trig_idx >= VMBUS_MONTRIGS_MAX) {
66 VMBUS_LOG(ERR, "invalid monitor trigger %u",
71 /* Host value is expressed in 100 nanosecond units */
72 dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
76 * Notify host that there are data pending on our TX bufring.
78 * Since this in userspace, rely on the monitor page.
79 * Can't do a hypercall from userspace.
82 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
84 const struct rte_vmbus_device *dev = chan->device;
85 const struct vmbus_br *tbr = &chan->txbr;
87 /* Make sure all updates are done before signaling host */
90 /* If host is ignoring interrupts? */
94 vmbus_set_event(dev, chan);
98 /* Do a simple send directly using transmit ring. */
99 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
100 void *data, uint32_t dlen,
101 uint64_t xactid, uint32_t flags, bool *need_sig)
103 struct vmbus_chanpkt pkt;
104 unsigned int pktlen, pad_pktlen;
105 const uint32_t hlen = sizeof(pkt);
106 bool send_evt = false;
111 pktlen = hlen + dlen;
112 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
115 pkt.hdr.flags = flags;
116 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
117 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
118 pkt.hdr.xactid = xactid;
120 iov[0].iov_base = &pkt;
121 iov[0].iov_len = hlen;
122 iov[1].iov_base = data;
123 iov[1].iov_len = dlen;
124 iov[2].iov_base = &pad;
125 iov[2].iov_len = pad_pktlen - pktlen;
127 error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
130 * caller sets need_sig to non-NULL if it will handle
131 * signaling if required later.
132 * if need_sig is NULL, signal now if needed.
135 *need_sig |= send_evt;
136 else if (error == 0 && send_evt)
137 rte_vmbus_chan_signal_tx(chan);
141 /* Do a scatter/gather send where the descriptor points to data. */
142 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
143 struct vmbus_gpa sg[], uint32_t sglen,
144 void *data, uint32_t dlen,
145 uint64_t xactid, bool *need_sig)
147 struct vmbus_chanpkt_sglist pkt;
148 unsigned int pktlen, pad_pktlen, hlen;
149 bool send_evt = false;
154 hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
155 pktlen = hlen + dlen;
156 pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
158 pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
159 pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
160 pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
161 pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
162 pkt.hdr.xactid = xactid;
166 iov[0].iov_base = &pkt;
167 iov[0].iov_len = sizeof(pkt);
168 iov[1].iov_base = sg;
169 iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
170 iov[2].iov_base = data;
171 iov[2].iov_len = dlen;
172 iov[3].iov_base = &pad;
173 iov[3].iov_len = pad_pktlen - pktlen;
175 error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
177 /* if caller is batching, just propagate the status */
179 *need_sig |= send_evt;
180 else if (error == 0 && send_evt)
181 rte_vmbus_chan_signal_tx(chan);
185 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
187 const struct vmbus_br *br = &channel->rxbr;
190 return br->vbr->rindex == br->vbr->windex;
193 /* Signal host after reading N bytes */
194 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
196 struct vmbus_br *rbr = &chan->rxbr;
197 uint32_t write_sz, pending_sz;
199 /* No need for signaling on older versions */
200 if (!rbr->vbr->feature_bits.feat_pending_send_sz)
203 /* Make sure reading of pending happens after new read index */
206 pending_sz = rbr->vbr->pending_send;
211 write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
213 /* If there was space before then host was not blocked */
214 if (write_sz - bytes_read > pending_sz)
217 /* If pending write will not fit */
218 if (write_sz <= pending_sz)
221 vmbus_set_event(chan->device, chan);
224 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
225 uint64_t *request_id)
227 struct vmbus_chanpkt_hdr pkt;
228 uint32_t dlen, hlen, bufferlen = *len;
233 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
237 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
238 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
239 /* XXX this channel is dead actually. */
243 if (unlikely(pkt.hlen > pkt.tlen)) {
244 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
249 /* Length are in quad words */
250 hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
251 dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
254 /* If caller buffer is not large enough */
255 if (unlikely(dlen > bufferlen))
259 *request_id = pkt.xactid;
261 /* Read data and skip packet header */
262 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
266 rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
270 /* TODO: replace this with inplace ring buffer (no copy) */
271 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
272 void *data, uint32_t *len)
274 struct vmbus_chanpkt_hdr pkt;
275 uint32_t dlen, bufferlen = *len;
278 error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
282 if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
283 VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
284 /* XXX this channel is dead actually. */
288 if (unlikely(pkt.hlen > pkt.tlen)) {
289 VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
294 /* Length are in quad words */
295 dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
298 /* If caller buffer is not large enough */
299 if (unlikely(dlen > bufferlen))
302 /* Read data and skip packet header */
303 error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
307 /* Return the number of bytes read */
308 return dlen + sizeof(uint64_t);
311 int vmbus_chan_create(const struct rte_vmbus_device *device,
312 uint16_t relid, uint16_t subid, uint8_t monitor_id,
313 struct vmbus_channel **new_chan)
315 struct vmbus_channel *chan;
318 chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
319 device->device.numa_node);
323 STAILQ_INIT(&chan->subchannel_list);
324 chan->device = device;
325 chan->subchannel_id = subid;
327 chan->monitor_id = monitor_id;
330 err = vmbus_uio_map_rings(chan);
339 /* Setup the primary channel */
340 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
341 struct vmbus_channel **new_chan)
343 struct mapped_vmbus_resource *uio_res;
346 uio_res = vmbus_uio_find_resource(device);
348 VMBUS_LOG(ERR, "can't find uio resource");
352 err = vmbus_chan_create(device, device->relid, 0,
353 device->monitor_id, new_chan);
355 device->primary = *new_chan;
356 uio_res->primary = *new_chan;
362 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
364 if (vmbus_uio_subchannels_supported(device, device->primary))
365 return VMBUS_MAX_CHANNELS;
370 /* Setup secondary channel */
371 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
372 struct vmbus_channel **new_chan)
374 struct vmbus_channel *chan;
377 err = vmbus_uio_get_subchan(primary, &chan);
381 STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
386 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
388 return chan->subchannel_id;
391 void rte_vmbus_chan_close(struct vmbus_channel *chan)
393 const struct rte_vmbus_device *device = chan->device;
394 struct vmbus_channel *primary = device->primary;
397 * intentionally leak primary channel because
398 * secondary may still reference it
400 if (chan != primary) {
401 STAILQ_REMOVE(&primary->subchannel_list, chan,
402 vmbus_channel, next);
408 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
410 const struct vmbus_bufring *vbr = br->vbr;
411 struct vmbus_chanpkt_hdr pkt;
413 fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
414 id, vbr->windex, vbr->rindex, vbr->imask,
415 vbr->pending_send, vbr->feature_bits.value);
416 fprintf(f, " size=%u avail write=%u read=%u\n",
417 br->dsize, vmbus_br_availwrite(br, vbr->windex),
418 vmbus_br_availread(br));
420 if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
421 fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
423 pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
424 pkt.flags, pkt.xactid);
427 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
429 fprintf(f, "channel[%u] relid=%u monitor=%u\n",
430 chan->subchannel_id, chan->relid, chan->monitor_id);
431 vmbus_dump_ring(f, "rxbr", &chan->rxbr);
432 vmbus_dump_ring(f, "txbr", &chan->txbr);