On 21.08.2024 18:06, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/include/asm/sbi.h
> +++ b/xen/arch/riscv/include/asm/sbi.h
> @@ -12,8 +12,41 @@
>  #ifndef __ASM_RISCV_SBI_H__
>  #define __ASM_RISCV_SBI_H__
>  
> +#include <xen/cpumask.h>
> +
>  #define SBI_EXT_0_1_CONSOLE_PUTCHAR          0x1
>  
> +#define SBI_EXT_BASE                    0x10
> +#define SBI_EXT_RFENCE                  0x52464E43
> +
> +/* SBI function IDs for BASE extension */
> +#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
> +#define SBI_EXT_BASE_GET_IMP_ID         0x1
> +#define SBI_EXT_BASE_GET_IMP_VERSION    0x2
> +#define SBI_EXT_BASE_PROBE_EXT          0x3
> +
> +/* SBI function IDs for RFENCE extension */
> +#define SBI_EXT_RFENCE_REMOTE_FENCE_I           0x0
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA        0x1
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID   0x2
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA       0x3
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID  0x4
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA       0x5
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID  0x6
> +
> +#define SBI_SPEC_VERSION_MAJOR_MASK     0x7F000000
> +#define SBI_SPEC_VERSION_MINOR_MASK     0xffffff

Nit: Perhaps neater / more clear as

#define SBI_SPEC_VERSION_MAJOR_MASK     0x7f000000
#define SBI_SPEC_VERSION_MINOR_MASK     0x00ffffff

> @@ -31,4 +64,34 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long 
> fid,
>   */
>  void sbi_console_putchar(int ch);
>  
> +/*
> + * Check underlying SBI implementation has RFENCE
> + *
> + * @return true for supported AND false for not-supported
> + */
> +bool sbi_has_rfence(void);
> +
> +/*
> + * Instructs the remote harts to execute one or more SFENCE.VMA
> + * instructions, covering the range of virtual addresses between
> + * [start_addr, start_addr + size).
> + *
> + * Returns 0 if IPI was sent to all the targeted harts successfully
> + * or negative value if start_addr or size is not valid.
> + *
> + * @hart_mask a cpu mask containing all the target harts.
> + * @param start virtual address start
> + * @param size virtual address range size
> + */
> +int sbi_remote_sfence_vma(const cpumask_t *cpu_mask,
> +                          unsigned long start_addr,
> +                          unsigned long size);

I may have asked before: Why not vaddr_t and size_t respectively?

> @@ -38,7 +51,265 @@ struct sbiret sbi_ecall(unsigned long ext, unsigned long 
> fid,
>      return ret;
>  }
>  
> +static int sbi_err_map_xen_errno(int err)
> +{
> +    switch ( err )
> +    {
> +    case SBI_SUCCESS:
> +        return 0;
> +    case SBI_ERR_DENIED:
> +        return -EACCES;
> +    case SBI_ERR_INVALID_PARAM:
> +        return -EINVAL;
> +    case SBI_ERR_INVALID_ADDRESS:
> +        return -EFAULT;
> +    case SBI_ERR_NOT_SUPPORTED:
> +        return -EOPNOTSUPP;
> +    case SBI_ERR_FAILURE:
> +        fallthrough;
> +    default:

What's the significance of the "fallthrough" here?

> +static unsigned long sbi_major_version(void)
> +{
> +    return MASK_EXTR(sbi_spec_version, SBI_SPEC_VERSION_MAJOR_MASK);
> +}
> +
> +static unsigned long sbi_minor_version(void)
> +{
> +    return MASK_EXTR(sbi_spec_version, SBI_SPEC_VERSION_MINOR_MASK);
> +}

Both functions return less than 32-bit wide values. Why unsigned long
return types?

> +static long sbi_ext_base_func(long fid)
> +{
> +    struct sbiret ret;
> +
> +    ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
> +
> +    /*
> +     * I wasn't able to find a case in the SBI spec where sbiret.value
> +     * could be negative.
> +     *
> +     * Unfortunately, the spec does not specify the possible values of
> +     * sbiret.value, but based on the description of the SBI function,
> +     * ret.value >= 0 when sbiret.error = 0. SPI spec specify only
> +     * possible value for sbiret.error (<= 0 whwere 0 is SBI_SUCCESS ).
> +     *
> +     * Just to be sure that SBI base extension functions one day won't
> +     * start to return a negative value for sbiret.value when
> +     * sbiret.error < 0 BUG_ON() is added.
> +     */
> +    BUG_ON(ret.value < 0);

I'd be careful here and move this ...

> +    if ( !ret.error )
> +        return ret.value;

... into the if() body here, just to avoid the BUG_ON() pointlessly
triggering ...

> +    else
> +        return ret.error;

... when an error is returned anyway. After all, if an error is returned,
ret.value presumably is (deemed) undefined.

> +static int sbi_rfence_v02_real(unsigned long fid,
> +                               unsigned long hmask, unsigned long hbase,
> +                               unsigned long start, unsigned long size,

Again vaddr_t / size_t perhaps? And then again elsewhere as well.

> +                               unsigned long arg4)
> +{
> +    struct sbiret ret = {0};
> +    int result = 0;
> +
> +    switch ( fid )
> +    {
> +    case SBI_EXT_RFENCE_REMOTE_FENCE_I:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                        0, 0, 0, 0);
> +        break;
> +
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                        start, size, 0, 0);
> +        break;
> +
> +    case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
> +    case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
> +        ret = sbi_ecall(SBI_EXT_RFENCE, fid, hmask, hbase,
> +                        start, size, arg4, 0);
> +        break;
> +
> +    default:
> +        printk("%s: unknown function ID [%lu]\n",

I wonder how useful the logging in decimal of (perhaps large) unknown
values is.

> +               __func__, fid);
> +        result = -EINVAL;
> +        break;
> +    };
> +
> +    if ( ret.error )
> +    {
> +        result = sbi_err_map_xen_errno(ret.error);
> +        printk("%s: hbase=%lu hmask=%#lx failed (error %d)\n",
> +               __func__, hbase, hmask, result);

Considering that sbi_err_map_xen_errno() may lose information, I'd
recommend logging ret.error here.

> +static int cf_check sbi_rfence_v02(unsigned long fid,
> +                                   const cpumask_t *cpu_mask,
> +                                   unsigned long start, unsigned long size,
> +                                   unsigned long arg4, unsigned long arg5)
> +{
> +    unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
> +    int result;
> +
> +    /*
> +     * hart_mask_base can be set to -1 to indicate that hart_mask can be
> +     * ignored and all available harts must be considered.
> +     */
> +    if ( !cpu_mask )
> +        return sbi_rfence_v02_real(fid, 0UL, -1UL, start, size, arg4);
> +
> +    for_each_cpu ( cpuid, cpu_mask )
> +    {
> +        /*
> +        * Hart IDs might not necessarily be numbered contiguously in
> +        * a multiprocessor system, but at least one hart must have a
> +        * hart ID of zero.

Does this latter fact matter here in any way?

> +        * This means that it is possible for the hart ID mapping to look 
> like:
> +        *  0, 1, 3, 65, 66, 69
> +        * In such cases, more than one call to sbi_rfence_v02_real() will be
> +        * needed, as a single hmask can only cover sizeof(unsigned long) 
> CPUs:
> +        *  1. sbi_rfence_v02_real(hmask=0b1011, hbase=0)
> +        *  2. sbi_rfence_v02_real(hmask=0b1011, hbase=65)
> +        *
> +        * The algorithm below tries to batch as many harts as possible before
> +        * making an SBI call. However, batching may not always be possible.
> +        * For example, consider the hart ID mapping:
> +        *   0, 64, 1, 65, 2, 66

Just to mention it: Batching is also possible here: First (0,1,2), then
(64,65,66). It just requires a different approach. Whether switching is
worthwhile depends on how numbering is done on real world systems.

> +        */
> +        hartid = cpuid_to_hartid(cpuid);
> +        if ( hmask )
> +        {
> +            if ( hartid + BITS_PER_LONG <= htop ||
> +                 hbase + BITS_PER_LONG <= hartid )
> +            {
> +                result = sbi_rfence_v02_real(fid, hmask, hbase,
> +                                             start, size, arg4);
> +                if ( result )
> +                    return result;
> +                hmask = 0;
> +            }
> +            else if ( hartid < hbase )
> +            {
> +                /* shift the mask to fit lower hartid */
> +                hmask <<= hbase - hartid;
> +                hbase = hartid;
> +            }
> +        }
> +
> +        if ( !hmask )
> +        {
> +            hbase = hartid;
> +            htop = hartid;
> +        }
> +        else if ( hartid > htop )
> +            htop = hartid;
> +
> +        hmask |= BIT(hartid - hbase, UL);
> +    }
> +
> +    if ( hmask )
> +    {
> +        result = sbi_rfence_v02_real(fid, hmask, hbase,
> +                                     start, size, arg4);
> +        if ( result )
> +            return result;

It's a little odd to have this here, rather than ...

> +    }
> +
> +    return 0;
> +}

... making these two a uniform return path. If you wanted you
could even replace the return in the loop with a simple break,
merely requiring the clearing of hmask to come first.

> +static int (* __ro_after_init sbi_rfence)(unsigned long fid,
> +                                          const cpumask_t *cpu_mask,
> +                                          unsigned long start,
> +                                          unsigned long size,
> +                                          unsigned long arg4,
> +                                          unsigned long arg5);
> +
> +int sbi_remote_sfence_vma(const cpumask_t *cpu_mask,
> +                          unsigned long start_addr,

To match other functions, perhaps just "start"?

> +int sbi_probe_extension(long extid)
> +{
> +    struct sbiret ret;
> +
> +    ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
> +                    0, 0, 0, 0, 0);
> +    if ( !ret.error && ret.value )
> +        return ret.value;
> +
> +    return -EOPNOTSUPP;

Any reason not to use sbi_err_map_xen_errno() here?

Jan

Reply via email to