Nikunj A Dadhania <[email protected]> writes:
> This series contains 7 new instructions for POWER9 ISA3.0
> Use newer qemu load/store tcg helpers and optimize stxvw4x and lxvw4x.
>
> GCC was adding epilogue for every VSX instructions causing change in
> behaviour. For testing the load vector instructions used mfvsrld/mfvsrd
> for loading vsr to register. And for testing store vector, used mtvsrdd
> instructions. This helped in getting rid of the epilogue added by gcc. Tried
> adding the test cases to kvm-unit-tests, but executing vsx instructions
> results in cpu exception. Will debug that later. I will send the test code
> and steps to execute as reply to this email.
Source code for stxv_x.c and lxv_x.c is attached and following are the
steps to use them:
Compile using IBM Advance toolchain[1]:
=======================================
/opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 lxv_x.c -o be_lxv_x
/opt/at10.0/bin/powerpc64-linux-gnu-gcc -static -O3 stxv_x.c -o be_stxv_x
/opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 lxv_x.c -o le_lxv_x
/opt/at10.0/bin/powerpc64le-linux-gnu-gcc -static -O3 stxv_x.c -o le_stxv_x
Run following for testing the instructions:
===========================================
for i in lxv_x stxv_x
do
echo "Running ... $i"
echo ">>>>>>>>>>>>>>>> LE LE LE >>>>>>>>>>>>>>"
../qemu/ppc64le-linux-user/qemu-ppc64le -cpu POWER9 le_${i}
echo ">>>>>>>>>>>>>>>> BE BE BE >>>>>>>>>>>>>>"
../qemu/ppc64-linux-user/qemu-ppc64 -cpu POWER9 be_${i}
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
done
Regards
Nikunj
1. ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/redhat/Fedora22
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
static void print16x1(uint8_t *p)
{
int i;
for(i = 0; i < 16; i++)
printf(" %02X ", p[i]);
printf("\n");
}
int main(void) {
__vector uint8_t vrt8;
uint8_t rb8[16];
unsigned long hi = 0x0001020310111213;
unsigned long lo = 0x2021222330313233;
asm volatile("mtvsrdd %x0, %2, %3;"
"stxvw4x %x0, 0, %1;"
: "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo));
print16x1(rb8);
asm volatile("mtvsrdd %x0, %2, %3;"
"stxvh8x %x0, 0, %1;"
: "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo));
print16x1(rb8);
asm volatile("mtvsrdd %x0, %2, %3;"
"stxvb16x %x0, 0, %1;"
: "=ws"(vrt8) : "r"(&rb8), "r"(hi), "r"(lo));
print16x1(rb8);
return EXIT_SUCCESS;
}
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
int main(void) {
__vector uint8_t vrt8;
unsigned long lo, hi;
#if __BYTE_ORDER == __LITTLE_ENDIAN
uint8_t rb32[16] = {0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10,
0x23, 0x22, 0x21, 0x20, 0x33, 0x32, 0x31, 0x30};
uint8_t rb16[16] = {0x01, 0x00, 0x11, 0x10, 0x21, 0x20, 0x31, 0x30,
0x41, 0x40, 0x51, 0x50, 0x61, 0x60, 0x71, 0x70};
#else
uint8_t rb32[16] = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
0x20, 0x21, 0x22, 0x23, 0x30, 0x31, 0x32, 0x33};
uint8_t rb16[16] = {0x00, 0x01, 0x10, 0x11, 0x20, 0x21, 0x30, 0x31,
0x40, 0x41, 0x50, 0x51, 0x60, 0x61, 0x70, 0x71};
#endif
uint8_t rb8[16] = {0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7};
asm volatile("lxvw4x %x0, 0, %1;"
"mfvsrd %2, %x0;"
"mfvsrld %3, %x0;"
: "=ws"(vrt8): "r"(&rb32), "r"(hi), "r"(lo));
printf("lxvw4x: hi %016lx lo %016lx \n", hi, lo);
asm volatile("lxvh8x %x0, 0, %1;"
"mfvsrd %2, %x0;"
"mfvsrld %3, %x0;"
: "=ws"(vrt8): "r"(&rb16), "r"(hi), "r"(lo));
printf("lxvh8x: hi %016lx lo %016lx \n", hi, lo);
asm volatile("lxvb16x %x0, 0, %1;"
"mfvsrd %2, %x0;"
"mfvsrld %3, %x0;"
: "=ws"(vrt8): "r"(&rb8), "r"(hi), "r"(lo));
printf("lxvb16x: hi %016lx lo %016lx \n", hi, lo);
return EXIT_SUCCESS;
}