One of the aspects of device access is whether CPU writes to a device
are posted or non-posted.  For non-posted writes, the CPU will wait
for the device to acknowledge that the write has performed.  If the
device sits on a bus far away, this can take a while and slow things
down.  The alternative are so-called posted writes.  The CPU will
"post" the write to the bus without waiting for an acknowledgement.
The CPU may receive an asynchronous notifaction at a later time that
the write didn't succeed or a failing write may be dropped without
further botification.  On most architectures whether writes are posted
or not is a property of the bus between the CPU and the device.  For
example, memory mapped I/O on the PCI bus is always posted and there
is nothing the CPU can do about it.

On the ARM architecture though we can indicate to the CPU whether
writes to a certain address range should be posted or not.  This is
done by specifying certain memory attributes in the mappings used by
the MMU.  The OpenBSD kernel always specifies device access as
non-posted.  On all ARM implementations we have seen so far this seems
to work even for writes to devices connected to a PCIe bus.  There
might be a penalty though, so I need to investigate this a bit
further.

However, on Apple's M1 SoC, this isn't the case.  Non-posted writes to
a bus that uses posted writes fail and vice-versa.  So in order to use
the PCIe bus on these SoCs we need to specify the right memory
attributes.  The diff below implements this by introducing a new
BUS_SPACE_MAP_POSTED flag.  At this point I don't expect generic
drivers to use this flag yet.  So there is no need to add it for other
architectures.  But I don't rule out we may have to use this flag in
sys/dev/fdt sometime in the future.  That is why I posted this to a
wider audience.

ok?


Index: arch/arm64/arm64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/locore.S,v
retrieving revision 1.32
diff -u -p -r1.32 locore.S
--- arch/arm64/arm64/locore.S   19 Oct 2020 17:57:40 -0000      1.32
+++ arch/arm64/arm64/locore.S   14 Feb 2021 21:28:26 -0000
@@ -233,9 +233,10 @@ switch_mmu_kernel:
 mair:
        /* Device | Normal (no cache, write-back, write-through) */
        .quad   MAIR_ATTR(0x00, 0) |    \
-               MAIR_ATTR(0x44, 1) |    \
-               MAIR_ATTR(0xff, 2) |    \
-               MAIR_ATTR(0x88, 3)
+               MAIR_ATTR(0x04, 1) |    \
+               MAIR_ATTR(0x44, 2) |    \
+               MAIR_ATTR(0xff, 3) |    \
+               MAIR_ATTR(0x88, 4)
 tcr:
        .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
            TCR_AS | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
Index: arch/arm64/arm64/locore0.S
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/locore0.S,v
retrieving revision 1.5
diff -u -p -r1.5 locore0.S
--- arch/arm64/arm64/locore0.S  28 May 2019 20:32:30 -0000      1.5
+++ arch/arm64/arm64/locore0.S  14 Feb 2021 21:28:26 -0000
@@ -34,8 +34,8 @@
 #include <machine/pte.h>
 
 #define        DEVICE_MEM      0
-#define        NORMAL_UNCACHED 1
-#define        NORMAL_MEM      2
+#define        NORMAL_UNCACHED 2
+#define        NORMAL_MEM      3
 
 /*
  * We assume:
Index: arch/arm64/arm64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/machdep.c,v
retrieving revision 1.57
diff -u -p -r1.57 machdep.c
--- arch/arm64/arm64/machdep.c  11 Feb 2021 23:55:48 -0000      1.57
+++ arch/arm64/arm64/machdep.c  14 Feb 2021 21:28:27 -0000
@@ -1188,7 +1188,7 @@ pmap_bootstrap_bs_map(bus_space_tag_t t,
 
        for (pa = startpa; pa < endpa; pa += PAGE_SIZE, va += PAGE_SIZE)
                pmap_kenter_cache(va, pa, PROT_READ | PROT_WRITE,
-                   PMAP_CACHE_DEV);
+                   PMAP_CACHE_DEV_NGNRNE);
 
        virtual_avail = va;
 
Index: arch/arm64/arm64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
retrieving revision 1.70
diff -u -p -r1.70 pmap.c
--- arch/arm64/arm64/pmap.c     25 Jan 2021 19:37:17 -0000      1.70
+++ arch/arm64/arm64/pmap.c     14 Feb 2021 21:28:28 -0000
@@ -472,7 +472,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
        if (pa & PMAP_NOCACHE)
                cache = PMAP_CACHE_CI;
        if (pa & PMAP_DEVICE)
-               cache = PMAP_CACHE_DEV;
+               cache = PMAP_CACHE_DEV_NGNRNE;
        pg = PHYS_TO_VM_PAGE(pa);
 
        pmap_lock(pm);
@@ -648,7 +648,7 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa, 
        pmap_pte_insert(pted);
 
        ttlb_flush(pm, va & ~PAGE_MASK);
-       if (cache == PMAP_CACHE_CI || cache == PMAP_CACHE_DEV)
+       if (cache == PMAP_CACHE_CI || cache == PMAP_CACHE_DEV_NGNRNE)
                cpu_idcache_wbinv_range(va & ~PAGE_MASK, PAGE_SIZE);
 }
 
@@ -735,7 +735,9 @@ pmap_fill_pte(pmap_t pm, vaddr_t va, pad
                break;
        case PMAP_CACHE_CI:
                break;
-       case PMAP_CACHE_DEV:
+       case PMAP_CACHE_DEV_NGNRNE:
+               break;
+       case PMAP_CACHE_DEV_NGNRE:
                break;
        default:
                panic("pmap_fill_pte:invalid cache mode");
@@ -1637,8 +1639,12 @@ pmap_pte_update(struct pte_desc *pted, u
                attr |= ATTR_IDX(PTE_ATTR_CI);
                attr |= ATTR_SH(SH_INNER);
                break;
-       case PMAP_CACHE_DEV:
-               attr |= ATTR_IDX(PTE_ATTR_DEV);
+       case PMAP_CACHE_DEV_NGNRNE:
+               attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       case PMAP_CACHE_DEV_NGNRE:
+               attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE);
                attr |= ATTR_SH(SH_INNER);
                break;
        default:
Index: arch/arm64/dev/arm64_bus_space.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/dev/arm64_bus_space.c,v
retrieving revision 1.7
diff -u -p -r1.7 arm64_bus_space.c
--- arch/arm64/dev/arm64_bus_space.c    20 Aug 2018 19:38:07 -0000      1.7
+++ arch/arm64/dev/arm64_bus_space.c    14 Feb 2021 21:28:29 -0000
@@ -191,8 +191,12 @@ generic_space_map(bus_space_tag_t t, bus
 {
        u_long startpa, endpa, pa;
        vaddr_t va;
-       int cache = flags & BUS_SPACE_MAP_CACHEABLE ?
-           PMAP_CACHE_WB : PMAP_CACHE_DEV;
+       int cache = PMAP_CACHE_DEV_NGNRNE;
+
+       if (flags & BUS_SPACE_MAP_CACHEABLE)
+               cache = PMAP_CACHE_WB;
+       if (flags & BUS_SPACE_MAP_POSTED)
+               cache = PMAP_CACHE_DEV_NGNRE;
 
        startpa = trunc_page(offs);
        endpa = round_page(offs + size);
Index: arch/arm64/include/bus.h
===================================================================
RCS file: /cvs/src/sys/arch/arm64/include/bus.h,v
retrieving revision 1.7
diff -u -p -r1.7 bus.h
--- arch/arm64/include/bus.h    13 Apr 2020 21:34:54 -0000      1.7
+++ arch/arm64/include/bus.h    14 Feb 2021 21:28:29 -0000
@@ -130,7 +130,7 @@ struct bus_space {
     (*(t)->_space_subregion)((t), (h), (o), (s), (p))
 
 #define BUS_SPACE_MAP_CACHEABLE                0x01
-#define BUS_SPACE_MAP_KSEG0            0x02
+#define BUS_SPACE_MAP_POSTED           0x02
 #define BUS_SPACE_MAP_LINEAR           0x04
 #define BUS_SPACE_MAP_PREFETCHABLE     0x08
 
Index: arch/arm64/include/pmap.h
===================================================================
RCS file: /cvs/src/sys/arch/arm64/include/pmap.h,v
retrieving revision 1.14
diff -u -p -r1.14 pmap.h
--- arch/arm64/include/pmap.h   21 Oct 2020 21:53:47 -0000      1.14
+++ arch/arm64/include/pmap.h   14 Feb 2021 21:28:29 -0000
@@ -42,7 +42,8 @@
 #define PMAP_CACHE_CI          (PMAP_MD0)              /* cache inhibit */
 #define PMAP_CACHE_WT          (PMAP_MD1)              /* writethru */
 #define PMAP_CACHE_WB          (PMAP_MD1|PMAP_MD0)     /* writeback */
-#define PMAP_CACHE_DEV         (PMAP_MD2)              /* device mapping */
+#define PMAP_CACHE_DEV_NGNRNE  (PMAP_MD2)              /* device nGnRnE */
+#define PMAP_CACHE_DEV_NGNRE   (PMAP_MD2|PMAP_MD0)     /* device nGnRE */
 #define PMAP_CACHE_BITS                (PMAP_MD0|PMAP_MD1|PMAP_MD2)    
 
 #define PTED_VA_MANAGED_M      (PMAP_MD3)
Index: arch/arm64/include/pte.h
===================================================================
RCS file: /cvs/src/sys/arch/arm64/include/pte.h,v
retrieving revision 1.5
diff -u -p -r1.5 pte.h
--- arch/arm64/include/pte.h    13 Apr 2017 23:29:02 -0000      1.5
+++ arch/arm64/include/pte.h    14 Feb 2021 21:28:30 -0000
@@ -53,11 +53,11 @@
 #define                ATTR_IDX(x)     ((x) << 2)
 #define                ATTR_IDX_MASK   (7 << 2)
 
-#define                PTE_ATTR_DEV    0
-#define                PTE_ATTR_CI     1
-#define                PTE_ATTR_WB     2
-#define                PTE_ATTR_WT     3
-
+#define                PTE_ATTR_DEV_NGNRNE     0
+#define                PTE_ATTR_DEV_NGNRE      1
+#define                PTE_ATTR_CI             2
+#define                PTE_ATTR_WB             3
+#define                PTE_ATTR_WT             4
 
 #define                SH_INNER        3
 #define                SH_OUTER        2

Reply via email to