[PATCH 4/5] ARM64: mmu: flush cacheable regions prior to remapping

mail archive of the barebox mailing list
 help / color / mirror / Atom feed

From: Ahmad Fatoum <a.fatoum@pengutronix.de>
To: barebox@lists.infradead.org
Cc: ejo@pengutronix.de, Marc Zyngier <maz@kernel.org>,
	Ahmad Fatoum <a.fatoum@pengutronix.de>
Subject: [PATCH 4/5] ARM64: mmu: flush cacheable regions prior to remapping
Date: Wed,  9 Oct 2024 08:05:10 +0200	[thread overview]
Message-ID: <20241009060511.4121157-5-a.fatoum@pengutronix.de> (raw)
In-Reply-To: <20241009060511.4121157-1-a.fatoum@pengutronix.de>

We currently invalidate memory ranges after remapping, because they may
contain stale dirty cache lines that interfere with further usage, e.g.
when running a memory test on a a DRAM area that was again remapped
cached, stale cache lines may remain from a run where the region
was cached.

The invalidation was done unconditionally to all memory regions being
remapped, even if they weren't previously cacheable. The CPU is within
rights to upgrade the invalidate to a clean+invalidate and the CPU
doesn't know about the state of the caches at the time of the permissions
check.

This led to barebox when run under qemu -enable-kvm on ARM64 to trigger
data aborts when executing DC IVAC on the cfi-flash MMIO regions after
remapping it.

Fix this by replacing the unconditional invalidation after the remapping
with flushing only cacheable pages before remapping non-cacheable.

The way we implement it with the previously unused find_pte() function
is less optimal than could be, but optimizing it further (probably at the
cost of readability) is left as a future exercise.

Cc: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/all/8634l97cfs.wl-maz@kernel.org/
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
---
 arch/arm/cpu/mmu_64.c | 105 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 7 deletions(-)

diff --git a/arch/arm/cpu/mmu_64.c b/arch/arm/cpu/mmu_64.c
index 7854f71f4cb6..94a19b1aec3c 100644
--- a/arch/arm/cpu/mmu_64.c
+++ b/arch/arm/cpu/mmu_64.c
@@ -63,15 +63,13 @@ static uint64_t *alloc_pte(void)
 }
 #endif
 
-static __maybe_unused uint64_t *find_pte(uint64_t addr)
+static uint64_t *__find_pte(uint64_t *ttb, uint64_t addr, int *level)
 {
-	uint64_t *pte;
+	uint64_t *pte = ttb;
 	uint64_t block_shift;
 	uint64_t idx;
 	int i;
 
-	pte = get_ttb();
-
 	for (i = 0; i < 4; i++) {
 		block_shift = level2shift(i);
 		idx = (addr & level2mask(i)) >> block_shift;
@@ -83,9 +81,17 @@ static __maybe_unused uint64_t *find_pte(uint64_t addr)
 			pte = (uint64_t *)(*pte & XLAT_ADDR_MASK);
 	}
 
+	if (level)
+		*level = i;
 	return pte;
 }
 
+/* This is currently unused, but useful for debugging */
+static __maybe_unused uint64_t *find_pte(uint64_t addr)
+{
+	return __find_pte(get_ttb(), addr, NULL);
+}
+
 #define MAX_PTE_ENTRIES 512
 
 /* Splits a block PTE into table with subpages spanning the old block */
@@ -168,6 +174,91 @@ static void create_sections(uint64_t virt, uint64_t phys, uint64_t size,
 	tlb_invalidate();
 }
 
+static size_t granule_size(int level)
+{
+	switch (level) {
+	default:
+	case 0:
+		return L0_XLAT_SIZE;
+	case 1:
+		return L1_XLAT_SIZE;
+	case 2:
+		return L2_XLAT_SIZE;
+	case 3:
+		return L3_XLAT_SIZE;
+	}
+}
+
+static bool pte_is_cacheable(uint64_t pte)
+{
+	return (pte & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
+}
+
+/**
+ * flush_cacheable_pages - Flush only the cacheable pages in a region
+ * @start: Starting virtual address of the range.
+ * @end:   Ending virtual address of the range.
+ *
+ * This function walks the page table and flushes the data caches for the
+ * specified range only if the memory is marked as normal cacheable in the
+ * page tables. If a non-cacheable or non-normal page is encountered,
+ * it's skipped.
+ */
+static void flush_cacheable_pages(void *start, size_t size)
+{
+	u64 flush_start = ~0ULL, flush_end = ~0ULL;
+	u64 region_start, region_end;
+	size_t block_size;
+	u64 *ttb;
+
+	region_start = PAGE_ALIGN_DOWN((ulong)start);
+	region_end = PAGE_ALIGN(region_start + size);
+
+	ttb = get_ttb();
+
+	/*
+	 * TODO: This loop could be made more optimal by inlining the page walk,
+	 * so we need not restart address translation from the top every time.
+	 *
+	 * The hope is that with the page tables being cached and the
+	 * windows being remapped being small, the overhead compared to
+	 * actually flushing the ranges isn't too significant.
+	 */
+	for (u64 addr = region_start; addr < region_end; addr += block_size) {
+		int level;
+		u64 *pte = __find_pte(ttb, addr, &level);
+
+		block_size = granule_size(level);
+		
+		if (!pte || !pte_is_cacheable(*pte))
+			continue;
+
+		if (flush_end == addr) {
+			/*
+			 * While it's safe to flush the whole block_size,
+			 * it's unnecessary time waste to go beyond region_end.
+			 */
+			flush_end = min(flush_end + block_size, region_end);
+			continue;
+		}
+
+		/*
+		 * We don't have a previous contiguous flush area to append to.
+		 * If we recorded any area before, let's flush it now
+		 */
+		if (flush_start != ~0ULL)
+			v8_flush_dcache_range(flush_start, flush_end);
+
+		/* and start the new contiguous flush area with this page */
+		flush_start = addr;
+		flush_end = min(flush_start + block_size, region_end);
+	}
+
+	/* The previous loop won't flush the last cached range, so do it here */
+	if (flush_start != ~0ULL)
+		v8_flush_dcache_range(flush_start, flush_end);
+}
+
 static unsigned long get_pte_attrs(unsigned flags)
 {
 	switch (flags) {
@@ -201,10 +292,10 @@ int arch_remap_range(void *virt_addr, phys_addr_t phys_addr, size_t size, unsign
 	if (attrs == ~0UL)
 		return -EINVAL;
 
-	create_sections((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs);
+	if (flags != MAP_CACHED)
+		flush_cacheable_pages(virt_addr, size);
 
-	if (flags == MAP_UNCACHED)
-		dma_inv_range(virt_addr, size);
+	create_sections((uint64_t)virt_addr, phys_addr, (uint64_t)size, attrs);
 
 	return 0;
 }
-- 
2.39.5

next prev parent reply	other threads:[~2024-10-09  6:08 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-09  6:05 [PATCH 0/5] ARM64: make barebox compatible with KVM Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 1/5] ARM64: io: implement I/O accessors in assembly Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 2/5] ARM64: board-dt-2nd: grow stack down from start of binary Ahmad Fatoum
2024-10-09  6:05 ` [PATCH 3/5] mtd: cfi-flash: use I/O accessors for reads/writes of MMIO regions Ahmad Fatoum
2024-10-09  6:05 ` Ahmad Fatoum [this message]
2024-10-09  6:05 ` [PATCH 5/5] virtio: don't use DMA API unless required Ahmad Fatoum
2024-10-14 12:48   ` Sascha Hauer
2024-10-14 13:05     ` Ahmad Fatoum
2024-10-14 13:06   ` [PATCH] fixup! " Ahmad Fatoum
2024-10-15  6:54 ` [PATCH 0/5] ARM64: make barebox compatible with KVM Sascha Hauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241009060511.4121157-5-a.fatoum@pengutronix.de \
    --to=a.fatoum@pengutronix.de \
    --cc=barebox@lists.infradead.org \
    --cc=ejo@pengutronix.de \
    --cc=maz@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox