> -#if defined(USE_HTM_FASTPATH) && !defined(HTM_CUSTOM_FASTPATH) > +#ifdef USE_HTM_FASTPATH > // HTM fastpath. Only chosen in the absence of transaction_cancel to allow > // using an uninstrumented code path. > // The fastpath is enabled only by dispatch_htm's method group, which uses > @@ -187,6 +187,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const > gtm_jmpbuf *jb) > // indeed in serial mode, and HW transactions should never need serial mode > // for any internal changes (e.g., they never abort visibly to the STM code > // and thus do not trigger the standard retry handling). > +#ifndef HTM_CUSTOM_FASTPATH > if (likely(htm_fastpath && (prop & pr_hasNoAbort))) > { > for (uint32_t t = htm_fastpath; t; t--) > @@ -237,6 +238,49 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const > gtm_jmpbuf *jb) > } > } > } > +#else > + // If we have a custom HTM fastpath in ITM_beginTransaction, we implement > + // just the retry policy here. We communicate with the custom fastpath
Don't you want this logic arranged as #ifdef HTM_CUSTOM_FASTPATH ... your new code #elif defined(USE_HTM_FASTPATH) ... existing htm code #endif > + /* Store edi for future HTM fast path retries. */ > + movl %edi, -8(%rsp) > + orl $pr_HTMRetryableAbort, %edi > + /* Let the C++ code handle the retry policy. */ > +no_htm: Note for future porting of this code to i386 or win64 -- there is no redzone. It might just be cleaner to go ahead and allocate the stack frame always, even if we don't store anything into it along the htm fastpath. > - subq $56, %rsp > - cfi_def_cfa_offset(64) > + subq $64, %rsp > + cfi_def_cfa_offset(72) You now have an abi-misaligned stack. Since the return address is pushed by the call, an aligned stack frame allocation is (N + 8) % 16 == 0. > + // Accessed from assembly language, thus the "asm" specifier on > + // the name, avoiding complex name mangling. > +#ifdef __USER_LABEL_PREFIX__ > +#define UPFX1(t) UPFX(t) > +#define UPFX(t) #t > + static gtm_rwlock serial_lock > + __asm__(UPFX1(__USER_LABEL_PREFIX__) "gtm_serial_lock"); > +#else > + static gtm_rwlock serial_lock > + __asm__("gtm_serial_lock"); > +#endif Now that we have 3 copies of this, we should simplify things. E.g. #ifdef __USER_LABEL_PREFIX__ # define UPFX1(X) #X # define UPFX UPFX1(__USER_LABEL_PREFIX__) #else # define UPFX #endif static gtm_rwlock serial_lock __asm__(UPFX "gtm_serial_lock"); or something. r~