On 7/16/20 1:36 AM, Maciej Fijalkowski wrote:
Currently, %rax is used to store the jump target when BPF program is
emitting the retpoline instructions that are handling the indirect
tailcall.

There is a plan to use %rax for different purpose, which is storing the
tail call counter. In order to preserve this value across the tailcalls,
use %rcx instead for jump target storage in retpoline instructions.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkow...@intel.com>
---
  arch/x86/include/asm/nospec-branch.h | 16 ++++++++--------
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index e7752b4038ff..e491c3d9f227 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
   *    lfence
   *    jmp spec_trap
   *  do_rop:
- *    mov %rax,(%rsp) for x86_64
+ *    mov %rcx,(%rsp) for x86_64
   *    mov %edx,(%esp) for x86_32
   *    retq
   *
   * Without retpolines configured:
   *
- *    jmp *%rax for x86_64
+ *    jmp *%rcx for x86_64
   *    jmp *%edx for x86_32
   */
  #ifdef CONFIG_RETPOLINE
  # ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE   17
-#  define RETPOLINE_RAX_BPF_JIT()                              \
+#  define RETPOLINE_RCX_BPF_JIT_SIZE   17
+#  define RETPOLINE_RCX_BPF_JIT()                              \
  do {                                                          \
        EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
        /* spec_trap: */                                        \
@@ -334,7 +334,7 @@ do {                                                        
        \
        EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
        EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
        /* do_rop: */                                           \
-       EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */    \
+       EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */    \
        EMIT1(0xC3);             /* retq */                     \
  } while (0)
  # else /* !CONFIG_X86_64 */
@@ -352,9 +352,9 @@ do {                                                        
        \
  # endif
  #else /* !CONFIG_RETPOLINE */
  # ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE   2
-#  define RETPOLINE_RAX_BPF_JIT()                              \
-       EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+#  define RETPOLINE_RCX_BPF_JIT_SIZE   2
+#  define RETPOLINE_RCX_BPF_JIT()                              \
+       EMIT2(0xFF, 0xE1);       /* jmp *%rcx */

Hmm, so the target prog gets loaded into rax in emit_bpf_tail_call_indirect()
but then you jump into rcx? What am I missing? This still needs to be 
bisectable.

  # else /* !CONFIG_X86_64 */
  #  define RETPOLINE_EDX_BPF_JIT()                             \
        EMIT2(0xFF, 0xE2)        /* jmp *%edx */


Reply via email to