mail archive of the barebox mailing list
 help / color / mirror / Atom feed
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
To: barebox@lists.infradead.org
Cc: ejo@pengutronix.de, Ahmad Fatoum <a.fatoum@pengutronix.de>
Subject: [PATCH 5/5] virtio: don't use DMA API unless required
Date: Wed,  9 Oct 2024 08:05:11 +0200	[thread overview]
Message-ID: <20241009060511.4121157-6-a.fatoum@pengutronix.de> (raw)
In-Reply-To: <20241009060511.4121157-1-a.fatoum@pengutronix.de>

We have no Virt I/O drivers that make use of the streaming DMA API, but
the Virt queues are currently always allocated using the coherent DMA
API.

The coherent DMA API (dma_alloc_coherent/dma_free_coherent) doesn't yet
take a device pointer in barebox, unlike Linux, and as such it
unconditionally allocates uncached memory.

When normally run under Qemu, this doesn't matter. But once we enable
KVM, using uncached memory for the Virtqueues has considerable
performance impact.

To avoid this, let's mimic what Linux does and just side step the DMA
API if the Virt I/O device tells us that this is ok.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
 drivers/virtio/virtio_ring.c | 85 ++++++++++++++++++++++++++++++++----
 include/linux/virtio_ring.h  |  1 +
 2 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0efe1e002506..787b04a766e9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -299,14 +299,81 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	return vq;
 }
 
-static void *vring_alloc_queue(size_t size, dma_addr_t *dma_handle)
+/*
+ * Modern virtio devices have feature bits to specify whether they need a
+ * quirk and bypass the IOMMU. If not there, just use the DMA API.
+ *
+ * If there, the interaction between virtio and DMA API is messy.
+ *
+ * On most systems with virtio, physical addresses match bus addresses,
+ * and it _shouldn't_ particularly matter whether we use the DMA API.
+ *
+ * However, barebox' dma_alloc_coherent doesn't yet take a device pointer
+ * as argument, so even for dma-coherent devices, the virtqueue is mapped
+ * uncached on ARM. This has considerable impact on the Virt I/O performance,
+ * so we really want to avoid using the DMA API if possible for the time being.
+ *
+ * On some systems, including Xen and any system with a physical device
+ * that speaks virtio behind a physical IOMMU, we must use the DMA API
+ * for virtio DMA to work at all.
+ *
+ * On other systems, including SPARC and PPC64, virtio-pci devices are
+ * enumerated as though they are behind an IOMMU, but the virtio host
+ * ignores the IOMMU, so we must either pretend that the IOMMU isn't
+ * there or somehow map everything as the identity.
+ *
+ * For the time being, we preserve historic behavior and bypass the DMA
+ * API.
+ *
+ * TODO: install a per-device DMA ops structure that does the right thing
+ * taking into account all the above quirks, and use the DMA API
+ * unconditionally on data path.
+ */
+
+static bool vring_use_dma_api(const struct virtio_device *vdev)
 {
-	return dma_alloc_coherent(size, dma_handle);
+	return !virtio_has_dma_quirk(vdev);
 }
 
-static void vring_free_queue(size_t size, void *queue, dma_addr_t dma_handle)
+static void *vring_alloc_queue(struct virtio_device *vdev,
+			       size_t size, dma_addr_t *dma_handle)
 {
-	dma_free_coherent(queue, dma_handle, size);
+	if (vring_use_dma_api(vdev)) {
+		return dma_alloc_coherent(size, dma_handle);
+	} else {
+		void *queue = memalign(PAGE_SIZE, PAGE_ALIGN(size));
+
+		if (queue) {
+			phys_addr_t phys_addr = virt_to_phys(queue);
+			*dma_handle = (dma_addr_t)phys_addr;
+
+			/*
+			 * Sanity check: make sure we dind't truncate
+			 * the address.  The only arches I can find that
+			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
+			 * are certain non-highmem MIPS and x86
+			 * configurations, but these configurations
+			 * should never allocate physical pages above 32
+			 * bits, so this is fine.  Just in case, throw a
+			 * warning and abort if we end up with an
+			 * unrepresentable address.
+			 */
+			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
+				free(queue);
+				return NULL;
+			}
+		}
+		return queue;
+	}
+}
+
+static void vring_free_queue(struct virtio_device *vdev,
+			     size_t size, void *queue, dma_addr_t dma_handle)
+{
+	if (vring_use_dma_api(vdev))
+		dma_free_coherent(queue, dma_handle, size);
+	else
+		free(queue);
 }
 
 struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num,
@@ -327,7 +394,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num,
 
 	/* TODO: allocate each queue chunk individually */
 	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
-		queue = vring_alloc_queue(vring_size(num, vring_align), &dma_addr);
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr);
 		if (queue)
 			break;
 	}
@@ -337,7 +404,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num,
 
 	if (!queue) {
 		/* Try to get a single page. You are my only hope! */
-		queue = vring_alloc_queue(vring_size(num, vring_align), &dma_addr);
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr);
 	}
 	if (!queue)
 		return NULL;
@@ -347,7 +414,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num,
 
 	vq = __vring_new_virtqueue(index, vring, vdev);
 	if (!vq) {
-		vring_free_queue(queue_size_in_bytes, queue, dma_addr);
+		vring_free_queue(vdev, queue_size_in_bytes, queue, dma_addr);
 		return NULL;
 	}
 	vq_debug(vq, "created vring @ (virt=%p, phys=%pad) for vq with num %u\n",
@@ -355,13 +422,15 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, unsigned int num,
 
 	vq->queue_dma_addr = dma_addr;
 	vq->queue_size_in_bytes = queue_size_in_bytes;
+	vq->use_dma_api = vring_use_dma_api(vdev);
 
 	return vq;
 }
 
 void vring_del_virtqueue(struct virtqueue *vq)
 {
-	vring_free_queue(vq->queue_size_in_bytes, vq->vring.desc, vq->queue_dma_addr);
+	vring_free_queue(vq->vdev, vq->queue_size_in_bytes,
+			 vq->vring.desc, vq->queue_dma_addr);
 	list_del(&vq->list);
 	free(vq);
 }
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index bdef47b0fa6c..7bf667611938 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -103,6 +103,7 @@ struct virtqueue {
 	unsigned int num_free;
 	struct vring vring;
 	bool event;
+	bool use_dma_api;
 	unsigned int free_head;
 	unsigned int num_added;
 	u16 last_used_idx;
-- 
2.39.5




  parent reply	other threads:[~2024-10-09  6:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-09  6:05 [PATCH 0/5] ARM64: make barebox compatible with KVM Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 1/5] ARM64: io: implement I/O accessors in assembly Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 2/5] ARM64: board-dt-2nd: grow stack down from start of binary Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 3/5] mtd: cfi-flash: use I/O accessors for reads/writes of MMIO regions Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 4/5] ARM64: mmu: flush cacheable regions prior to remapping Ahmad Fatoum
2024-10-09  6:05 ` Ahmad Fatoum [this message]
2024-10-14 12:48   ` [PATCH 5/5] virtio: don't use DMA API unless required Sascha Hauer
2024-10-14 13:05     ` Ahmad Fatoum
2024-10-14 13:06   ` [PATCH] fixup! " Ahmad Fatoum
2024-10-15  6:54 ` [PATCH 0/5] ARM64: make barebox compatible with KVM Sascha Hauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241009060511.4121157-6-a.fatoum@pengutronix.de \
    --to=a.fatoum@pengutronix.de \
    --cc=barebox@lists.infradead.org \
    --cc=ejo@pengutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox