On 03/01/17 13:26, David Gibson wrote: > On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote: >> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which >> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls. >> However it only supports 32bit DMA windows at zero bus offset. >> >> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit >> window size, variable page size and bus offset. >> >> This makes use of the new capability. The kernel headers are already >> updated as the kernel support went in to v4.6. >> >> Signed-off-by: Alexey Kardashevskiy <[email protected]> >> --- >> target-ppc/kvm_ppc.h | 12 +++++++----- >> hw/ppc/spapr_iommu.c | 8 +++++--- >> target-ppc/kvm.c | 48 +++++++++++++++++++++++++++++++++++++----------- >> 3 files changed, 49 insertions(+), 19 deletions(-) >> >> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h >> index bd1d78bfbe..14320c2378 100644 >> --- a/target-ppc/kvm_ppc.h >> +++ b/target-ppc/kvm_ppc.h >> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); >> #ifndef CONFIG_USER_ONLY >> off_t kvmppc_alloc_rma(void **rma); >> bool kvmppc_spapr_use_multitce(void); >> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int >> *pfd, >> - bool need_vfio); >> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, >> + uint64_t bus_offset, uint32_t nb_table, >> + int *pfd, bool need_vfio); >> int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); >> int kvmppc_reset_htab(int shift_hint); >> uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); >> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void) >> return false; >> } >> >> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn, >> - uint32_t window_size, int *fd, >> - bool need_vfio) >> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t >> page_shift, >> + uint64_t bus_offset, >> + uint32_t nb_table, >> + int *pfd, bool need_vfio) >> { >> return NULL; >> } >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >> index ae30bbe30f..29c80bb3c8 100644 >> --- a/hw/ppc/spapr_iommu.c >> +++ b/hw/ppc/spapr_iommu.c >> @@ -79,15 +79,16 @@ static IOMMUAccessFlags >> spapr_tce_iommu_access_flags(uint64_t tce) >> >> static uint64_t *spapr_tce_alloc_table(uint32_t liobn, >> uint32_t page_shift, >> + uint64_t bus_offset, >> uint32_t nb_table, >> int *fd, >> bool need_vfio) >> { >> uint64_t *table = NULL; >> - uint64_t window_size = (uint64_t)nb_table << page_shift; >> >> - if (kvm_enabled() && !(window_size >> 32)) { >> - table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio); >> + if (kvm_enabled()) { > > This is broken. Previously, if we had a >4GiB window, we'd fall back > to managing it in userspace, which would work, albeit slowly. Now, if > you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it > will attempt to allocate it in the kernel, and fail completely.
No, kvmppc_create_spapr_tce() would return NULL and right after that there
is a "if (!table)" (it can be seen at the end of this chunk) to handle the
failure.
>
>> + table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset,
>> nb_table,
>> + fd, need_vfio);
>> }
>>
>> if (!table) {
>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
>> tcet->nb_table = nb_table;
>> tcet->table = spapr_tce_alloc_table(tcet->liobn,
>> tcet->page_shift,
>> + tcet->bus_offset,
>> tcet->nb_table,
>> &tcet->fd,
>> tcet->need_vfio);
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 9c4834c4fc..6e91a4d8bb 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -71,6 +71,7 @@ static int cap_booke_sregs;
>> static int cap_ppc_smt;
>> static int cap_ppc_rma;
>> static int cap_spapr_tce;
>> +static int cap_spapr_tce_64;
>> static int cap_spapr_multitce;
>> static int cap_spapr_vfio;
>> static int cap_hior;
>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>> cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
>> cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
>> cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
>> + cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
>> cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
>> cap_spapr_vfio = false;
>> cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void)
>> return cap_spapr_multitce;
>> }
>>
>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int
>> *pfd,
>> - bool need_vfio)
>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
>> + uint64_t bus_offset, uint32_t nb_table,
>> + int *pfd, bool need_vfio)
>> {
>> - struct kvm_create_spapr_tce args = {
>> - .liobn = liobn,
>> - .window_size = window_size,
>> - };
>> long len;
>> int fd;
>> void *table;
>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn,
>> uint32_t window_size, int *pfd,
>> return NULL;
>> }
>>
>> - fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>> - if (fd < 0) {
>> - fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
>> - liobn);
>> + if (cap_spapr_tce_64) {
>> + struct kvm_create_spapr_tce_64 args = {
>> + .liobn = liobn,
>> + .page_shift = page_shift,
>> + .offset = bus_offset >> page_shift,
>> + .size = nb_table,
>> + .flags = 0
>> + };
>> + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
>> + if (fd < 0) {
>> + fprintf(stderr,
>> + "KVM: Failed to create TCE64 table for liobn 0x%x\n",
>> + liobn);
>> + return NULL;
>> + }
>> + } else if (cap_spapr_tce) {
>> + uint64_t window_size = (uint64_t) nb_table << page_shift;
>> + struct kvm_create_spapr_tce args = {
>> + .liobn = liobn,
>> + .window_size = window_size,
>> + };
>> + if ((window_size != args.window_size) || bus_offset) {
>> + return NULL;
>> + }
>> + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
>> + if (fd < 0) {
>> + fprintf(stderr, "KVM: Failed to create TCE table for liobn
>> 0x%x\n",
>> + liobn);
>> + return NULL;
>> + }
>> + } else {
>> return NULL;
>> }
>>
>> - len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
>> + len = nb_table * sizeof(uint64_t);
>> /* FIXME: round this up to page size */
>>
>> table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
>
--
Alexey
signature.asc
Description: OpenPGP digital signature
