> On 15 Feb 2021, at 07:54, Mark Kettenis <mark.kette...@xs4all.nl> wrote:
> 
> One of the aspects of device access is whether CPU writes to a device
> are posted or non-posted.  For non-posted writes, the CPU will wait
> for the device to acknowledge that the write has performed.  If the
> device sits on a bus far away, this can take a while and slow things
> down.  The alternative are so-called posted writes.  The CPU will
> "post" the write to the bus without waiting for an acknowledgement.
> The CPU may receive an asynchronous notifaction at a later time that
> the write didn't succeed or a failing write may be dropped without
> further botification.  On most architectures whether writes are posted
> or not is a property of the bus between the CPU and the device.  For
> example, memory mapped I/O on the PCI bus is always posted and there
> is nothing the CPU can do about it.
> 
> On the ARM architecture though we can indicate to the CPU whether
> writes to a certain address range should be posted or not.  This is
> done by specifying certain memory attributes in the mappings used by
> the MMU.  The OpenBSD kernel always specifies device access as
> non-posted.  On all ARM implementations we have seen so far this seems
> to work even for writes to devices connected to a PCIe bus.  There
> might be a penalty though, so I need to investigate this a bit
> further.
> 
> However, on Apple's M1 SoC, this isn't the case.  Non-posted writes to
> a bus that uses posted writes fail and vice-versa.  So in order to use
> the PCIe bus on these SoCs we need to specify the right memory
> attributes.  The diff below implements this by introducing a new
> BUS_SPACE_MAP_POSTED flag.  At this point I don't expect generic
> drivers to use this flag yet.  So there is no need to add it for other
> architectures.  But I don't rule out we may have to use this flag in
> sys/dev/fdt sometime in the future.  That is why I posted this to a
> wider audience.

You don't want to (ab)use one of the existing flags? If I squint and read kind 
of quickly I could imagine this is kind of like write combining, like what 
BUS_SPACE_MAP_PREFETCHABLE can do on pci busses.

If this does leak into fdt, would it just be a nop on other archs that use 
those drivers?

dlg

> 
> ok?
> 
> 
> Index: arch/arm64/arm64/locore.S
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/locore.S,v
> retrieving revision 1.32
> diff -u -p -r1.32 locore.S
> --- arch/arm64/arm64/locore.S 19 Oct 2020 17:57:40 -0000      1.32
> +++ arch/arm64/arm64/locore.S 14 Feb 2021 21:28:26 -0000
> @@ -233,9 +233,10 @@ switch_mmu_kernel:
> mair:
>       /* Device | Normal (no cache, write-back, write-through) */
>       .quad   MAIR_ATTR(0x00, 0) |    \
> -             MAIR_ATTR(0x44, 1) |    \
> -             MAIR_ATTR(0xff, 2) |    \
> -             MAIR_ATTR(0x88, 3)
> +             MAIR_ATTR(0x04, 1) |    \
> +             MAIR_ATTR(0x44, 2) |    \
> +             MAIR_ATTR(0xff, 3) |    \
> +             MAIR_ATTR(0x88, 4)
> tcr:
>       .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
>           TCR_AS | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
> Index: arch/arm64/arm64/locore0.S
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/locore0.S,v
> retrieving revision 1.5
> diff -u -p -r1.5 locore0.S
> --- arch/arm64/arm64/locore0.S        28 May 2019 20:32:30 -0000      1.5
> +++ arch/arm64/arm64/locore0.S        14 Feb 2021 21:28:26 -0000
> @@ -34,8 +34,8 @@
> #include <machine/pte.h>
> 
> #define       DEVICE_MEM      0
> -#define      NORMAL_UNCACHED 1
> -#define      NORMAL_MEM      2
> +#define      NORMAL_UNCACHED 2
> +#define      NORMAL_MEM      3
> 
> /*
>  * We assume:
> Index: arch/arm64/arm64/machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/machdep.c,v
> retrieving revision 1.57
> diff -u -p -r1.57 machdep.c
> --- arch/arm64/arm64/machdep.c        11 Feb 2021 23:55:48 -0000      1.57
> +++ arch/arm64/arm64/machdep.c        14 Feb 2021 21:28:27 -0000
> @@ -1188,7 +1188,7 @@ pmap_bootstrap_bs_map(bus_space_tag_t t,
> 
>       for (pa = startpa; pa < endpa; pa += PAGE_SIZE, va += PAGE_SIZE)
>               pmap_kenter_cache(va, pa, PROT_READ | PROT_WRITE,
> -                 PMAP_CACHE_DEV);
> +                 PMAP_CACHE_DEV_NGNRNE);
> 
>       virtual_avail = va;
> 
> Index: arch/arm64/arm64/pmap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 pmap.c
> --- arch/arm64/arm64/pmap.c   25 Jan 2021 19:37:17 -0000      1.70
> +++ arch/arm64/arm64/pmap.c   14 Feb 2021 21:28:28 -0000
> @@ -472,7 +472,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>       if (pa & PMAP_NOCACHE)
>               cache = PMAP_CACHE_CI;
>       if (pa & PMAP_DEVICE)
> -             cache = PMAP_CACHE_DEV;
> +             cache = PMAP_CACHE_DEV_NGNRNE;
>       pg = PHYS_TO_VM_PAGE(pa);
> 
>       pmap_lock(pm);
> @@ -648,7 +648,7 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa, 
>       pmap_pte_insert(pted);
> 
>       ttlb_flush(pm, va & ~PAGE_MASK);
> -     if (cache == PMAP_CACHE_CI || cache == PMAP_CACHE_DEV)
> +     if (cache == PMAP_CACHE_CI || cache == PMAP_CACHE_DEV_NGNRNE)
>               cpu_idcache_wbinv_range(va & ~PAGE_MASK, PAGE_SIZE);
> }
> 
> @@ -735,7 +735,9 @@ pmap_fill_pte(pmap_t pm, vaddr_t va, pad
>               break;
>       case PMAP_CACHE_CI:
>               break;
> -     case PMAP_CACHE_DEV:
> +     case PMAP_CACHE_DEV_NGNRNE:
> +             break;
> +     case PMAP_CACHE_DEV_NGNRE:
>               break;
>       default:
>               panic("pmap_fill_pte:invalid cache mode");
> @@ -1637,8 +1639,12 @@ pmap_pte_update(struct pte_desc *pted, u
>               attr |= ATTR_IDX(PTE_ATTR_CI);
>               attr |= ATTR_SH(SH_INNER);
>               break;
> -     case PMAP_CACHE_DEV:
> -             attr |= ATTR_IDX(PTE_ATTR_DEV);
> +     case PMAP_CACHE_DEV_NGNRNE:
> +             attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE);
> +             attr |= ATTR_SH(SH_INNER);
> +             break;
> +     case PMAP_CACHE_DEV_NGNRE:
> +             attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE);
>               attr |= ATTR_SH(SH_INNER);
>               break;
>       default:
> Index: arch/arm64/dev/arm64_bus_space.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/dev/arm64_bus_space.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 arm64_bus_space.c
> --- arch/arm64/dev/arm64_bus_space.c  20 Aug 2018 19:38:07 -0000      1.7
> +++ arch/arm64/dev/arm64_bus_space.c  14 Feb 2021 21:28:29 -0000
> @@ -191,8 +191,12 @@ generic_space_map(bus_space_tag_t t, bus
> {
>       u_long startpa, endpa, pa;
>       vaddr_t va;
> -     int cache = flags & BUS_SPACE_MAP_CACHEABLE ?
> -         PMAP_CACHE_WB : PMAP_CACHE_DEV;
> +     int cache = PMAP_CACHE_DEV_NGNRNE;
> +
> +     if (flags & BUS_SPACE_MAP_CACHEABLE)
> +             cache = PMAP_CACHE_WB;
> +     if (flags & BUS_SPACE_MAP_POSTED)
> +             cache = PMAP_CACHE_DEV_NGNRE;
> 
>       startpa = trunc_page(offs);
>       endpa = round_page(offs + size);
> Index: arch/arm64/include/bus.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/include/bus.h,v
> retrieving revision 1.7
> diff -u -p -r1.7 bus.h
> --- arch/arm64/include/bus.h  13 Apr 2020 21:34:54 -0000      1.7
> +++ arch/arm64/include/bus.h  14 Feb 2021 21:28:29 -0000
> @@ -130,7 +130,7 @@ struct bus_space {
>     (*(t)->_space_subregion)((t), (h), (o), (s), (p))
> 
> #define BUS_SPACE_MAP_CACHEABLE               0x01
> -#define BUS_SPACE_MAP_KSEG0          0x02
> +#define BUS_SPACE_MAP_POSTED         0x02
> #define BUS_SPACE_MAP_LINEAR          0x04
> #define BUS_SPACE_MAP_PREFETCHABLE    0x08
> 
> Index: arch/arm64/include/pmap.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/include/pmap.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 pmap.h
> --- arch/arm64/include/pmap.h 21 Oct 2020 21:53:47 -0000      1.14
> +++ arch/arm64/include/pmap.h 14 Feb 2021 21:28:29 -0000
> @@ -42,7 +42,8 @@
> #define PMAP_CACHE_CI         (PMAP_MD0)              /* cache inhibit */
> #define PMAP_CACHE_WT         (PMAP_MD1)              /* writethru */
> #define PMAP_CACHE_WB         (PMAP_MD1|PMAP_MD0)     /* writeback */
> -#define PMAP_CACHE_DEV               (PMAP_MD2)              /* device 
> mapping */
> +#define PMAP_CACHE_DEV_NGNRNE        (PMAP_MD2)              /* device 
> nGnRnE */
> +#define PMAP_CACHE_DEV_NGNRE (PMAP_MD2|PMAP_MD0)     /* device nGnRE */
> #define PMAP_CACHE_BITS               (PMAP_MD0|PMAP_MD1|PMAP_MD2)    
> 
> #define PTED_VA_MANAGED_M     (PMAP_MD3)
> Index: arch/arm64/include/pte.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/include/pte.h,v
> retrieving revision 1.5
> diff -u -p -r1.5 pte.h
> --- arch/arm64/include/pte.h  13 Apr 2017 23:29:02 -0000      1.5
> +++ arch/arm64/include/pte.h  14 Feb 2021 21:28:30 -0000
> @@ -53,11 +53,11 @@
> #define               ATTR_IDX(x)     ((x) << 2)
> #define               ATTR_IDX_MASK   (7 << 2)
> 
> -#define              PTE_ATTR_DEV    0
> -#define              PTE_ATTR_CI     1
> -#define              PTE_ATTR_WB     2
> -#define              PTE_ATTR_WT     3
> -
> +#define              PTE_ATTR_DEV_NGNRNE     0
> +#define              PTE_ATTR_DEV_NGNRE      1
> +#define              PTE_ATTR_CI             2
> +#define              PTE_ATTR_WB             3
> +#define              PTE_ATTR_WT             4
> 
> #define               SH_INNER        3
> #define               SH_OUTER        2
> 

Reply via email to