https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122610
--- Comment #5 from Alan Wu <gcc at alanwu dot email> ---
Thank you, noinline on RSTRING_PTR instead of -march=amdfam10 made for much
quicker reduction. Using -O1 and select passes from -O2 also helped.
Options:
-std=gnu2x -Wall -Wpedantic -O1 -fipa-modref -fno-strict-aliasing
-ftree-pre -fvect-cost-model=very-cheap
Code:
typedef unsigned long VALUE;
struct RBasic {
VALUE flags;
const VALUE klass;
};
struct RString {
struct RBasic basic;
long len;
union {
struct {
char *ptr;
union {
long capa;
VALUE shared;
} aux;
} heap;
struct {
char ary[1];
} embed;
} as;
};
char *str_buf_cat4_sptr;
void *ruby_xmalloc2(void);
enum {
RUBY_FL_USHIFT = 12,
RUBY_FL_USER1 = 1 << (RUBY_FL_USHIFT+1),
RSTRING_NOEMBED = RUBY_FL_USER1,
};
inline VALUE RB_FL_ANY_RAW(VALUE obj, VALUE flags) {
return ((struct RString *)obj)->basic.flags & flags;
}
inline struct RString rbimpl_rstring_getmem(VALUE str) {
if (RB_FL_ANY_RAW(str, RSTRING_NOEMBED))
return *(struct RString *)str;
struct RString retval;
retval.len = ((struct RString *)str)->len;
retval.as.heap.ptr = ((struct RString *)str)->as.embed.ary;
return retval;
}
char *RSTRING_PTR(VALUE str) { return
rbimpl_rstring_getmem(str).as.heap.ptr; }
void str_buf_cat4(VALUE str, char *ptr, long len) {
struct RString rbimpl_str = *(struct RString *)str;
str_buf_cat4_sptr = RSTRING_PTR(str);
((struct RString *)str)->basic.flags |= RSTRING_NOEMBED;
((struct RString *)str)->len = rbimpl_str.len + len;
((struct RString *)str)->as.heap.ptr = ruby_xmalloc2();
str_buf_cat4_sptr = RSTRING_PTR(str);
__builtin_memcpy(str_buf_cat4_sptr, ptr, len);
}
void rb_str_buf_append(VALUE str, VALUE str2) {
str_buf_cat4(str, RSTRING_PTR(str2), ((struct RString *)str2)->len);
}
Issue shows on 12/13/14/15 (https://godbolt.org/z/75hc6jc4P), and they all fail
to forward what ruby_xmalloc2 returns to memcpy:
call ruby_xmalloc2
mov QWORD PTR [rbx+24], rax
mov QWORD PTR str_buf_cat4_sptr[rip], rbp
mov rdx, r12
mov rsi, r14
mov rdi, rbp
call memcpy