On Fri, Sep 24, 2021 at 11:46:57AM +0200, Jan Beulich wrote:
> While already the case for PVH, there's no reason to treat PV
> differently here, though of course the addresses get taken from another
> source in this case. Except that, to match CPU side mappings, by default
> we permit r/o ones. This then also means we now deal consistently with
> IO-APICs whose MMIO is or is not covered by E820 reserved regions.
> 
> Signed-off-by: Jan Beulich <[email protected]>
> ---
> [integrated] v1: Integrate into series.
> [standalone] v2: Keep IOMMU mappings in sync with CPU ones.
> 
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -253,12 +253,12 @@ void iommu_identity_map_teardown(struct
>      }
>  }
>  
> -static bool __hwdom_init hwdom_iommu_map(const struct domain *d,
> -                                         unsigned long pfn,
> -                                         unsigned long max_pfn)
> +static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
> +                                                 unsigned long pfn,
> +                                                 unsigned long max_pfn)
>  {
>      mfn_t mfn = _mfn(pfn);
> -    unsigned int i, type;
> +    unsigned int i, type, perms = IOMMUF_readable | IOMMUF_writable;
>  
>      /*
>       * Set up 1:1 mapping for dom0. Default to include only conventional RAM
> @@ -267,44 +267,60 @@ static bool __hwdom_init hwdom_iommu_map
>       * that fall in unusable ranges for PV Dom0.
>       */
>      if ( (pfn > max_pfn && !mfn_valid(mfn)) || xen_in_range(pfn) )
> -        return false;
> +        return 0;
>  
>      switch ( type = page_get_ram_type(mfn) )
>      {
>      case RAM_TYPE_UNUSABLE:
> -        return false;
> +        return 0;
>  
>      case RAM_TYPE_CONVENTIONAL:
>          if ( iommu_hwdom_strict )
> -            return false;
> +            return 0;
>          break;
>  
>      default:
>          if ( type & RAM_TYPE_RESERVED )
>          {
>              if ( !iommu_hwdom_inclusive && !iommu_hwdom_reserved )
> -                return false;
> +                perms = 0;
>          }
> -        else if ( is_hvm_domain(d) || !iommu_hwdom_inclusive || pfn > 
> max_pfn )
> -            return false;
> +        else if ( is_hvm_domain(d) )
> +            return 0;
> +        else if ( !iommu_hwdom_inclusive || pfn > max_pfn )
> +            perms = 0;

I'm confused about the reason to set perms = 0 instead of just
returning here. AFAICT perms won't be set to any other value below,
so you might as well just return 0.

>      }
>  
>      /* Check that it doesn't overlap with the Interrupt Address Range. */
>      if ( pfn >= 0xfee00 && pfn <= 0xfeeff )
> -        return false;
> +        return 0;
>      /* ... or the IO-APIC */
> -    for ( i = 0; has_vioapic(d) && i < d->arch.hvm.nr_vioapics; i++ )
> -        if ( pfn == PFN_DOWN(domain_vioapic(d, i)->base_address) )
> -            return false;
> +    if ( has_vioapic(d) )
> +    {
> +        for ( i = 0; i < d->arch.hvm.nr_vioapics; i++ )
> +            if ( pfn == PFN_DOWN(domain_vioapic(d, i)->base_address) )
> +                return 0;
> +    }
> +    else if ( is_pv_domain(d) )
> +    {
> +        /*
> +         * Be consistent with CPU mappings: Dom0 is permitted to establish 
> r/o
> +         * ones there, so it should also have such established for IOMMUs.
> +         */
> +        for ( i = 0; i < nr_ioapics; i++ )
> +            if ( pfn == PFN_DOWN(mp_ioapics[i].mpc_apicaddr) )
> +                return rangeset_contains_singleton(mmio_ro_ranges, pfn)
> +                       ? IOMMUF_readable : 0;
> +    }

Note that the emulated vIO-APICs are mapped over the real ones (ie:
using the same base addresses), and hence both loops will end up using
the same regions. I would rather keep them separated anyway, just in
case we decide to somehow change the position of the emulated ones in
the future.

>      /*
>       * ... or the PCIe MCFG regions.
>       * TODO: runtime added MMCFG regions are not checked to make sure they
>       * don't overlap with already mapped regions, thus preventing trapping.
>       */
>      if ( has_vpci(d) && vpci_is_mmcfg_address(d, pfn_to_paddr(pfn)) )
> -        return false;
> +        return 0;
>  
> -    return true;
> +    return perms;
>  }
>  
>  void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> @@ -346,15 +362,19 @@ void __hwdom_init arch_iommu_hwdom_init(
>      for ( ; i < top; i++ )
>      {
>          unsigned long pfn = pdx_to_pfn(i);
> +        unsigned int perms = hwdom_iommu_map(d, pfn, max_pfn);
>          int rc;
>  
> -        if ( !hwdom_iommu_map(d, pfn, max_pfn) )
> +        if ( !perms )
>              rc = 0;
>          else if ( paging_mode_translate(d) )
> -            rc = set_identity_p2m_entry(d, pfn, p2m_access_rw, 0);
> +            rc = set_identity_p2m_entry(d, pfn,
> +                                        perms & IOMMUF_writable ? 
> p2m_access_rw
> +                                                                : 
> p2m_access_r,
> +                                        0);
>          else
>              rc = iommu_map(d, _dfn(pfn), _mfn(pfn), 1ul << PAGE_ORDER_4K,
> -                           IOMMUF_readable | IOMMUF_writable, &flush_flags);
> +                           perms, &flush_flags);

You could just call set_identity_p2m_entry uniformly here. It will
DTRT for non-translated guests also, and then hwdom_iommu_map could
perhaps return a p2m_access_t?

Thanks, Roger.

Reply via email to