On 2018-08-01 06:37 AM, Richard Biener wrote: > On Tue, Jul 31, 2018 at 7:40 PM Michael Ploujnikov > <michael.ploujni...@oracle.com> wrote: >> >> On 2018-07-26 01:27 PM, Michael Ploujnikov wrote: >>> On 2018-07-24 09:57 AM, Michael Ploujnikov wrote: >>>> On 2018-07-20 06:05 AM, Richard Biener wrote: >>>>>> /* Return a new assembler name for a clone with SUFFIX of a decl named >>>>>> NAME. */ >>>>>> @@ -521,14 +521,13 @@ tree >>>>>> clone_function_name_1 (const char *name, const char *suffix) >>>>> >>>>> pass this function the counter to use.... >>>>> >>>>>> { >>>>>> size_t len = strlen (name); >>>>>> - char *tmp_name, *prefix; >>>>>> + char *prefix; >>>>>> >>>>>> prefix = XALLOCAVEC (char, len + strlen (suffix) + 2); >>>>>> memcpy (prefix, name, len); >>>>>> strcpy (prefix + len + 1, suffix); >>>>>> prefix[len] = symbol_table::symbol_suffix_separator (); >>>>>> - ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++); >>>>> >>>>> and keep using ASM_FORMAT_PRIVATE_NAME here. You need to change >>>>> the lto/lto-partition.c caller (just use zero as counter). >>>>> >>>>>> - return get_identifier (tmp_name); >>>>>> + return get_identifier (prefix); >>>>>> } >>>>>> >>>>>> /* Return a new assembler name for a clone of DECL with SUFFIX. */ >>>>>> @@ -537,7 +536,17 @@ tree >>>>>> clone_function_name (tree decl, const char *suffix) >>>>>> { >>>>>> tree name = DECL_ASSEMBLER_NAME (decl); >>>>>> - return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix); >>>>>> + const char *decl_name = IDENTIFIER_POINTER (name); >>>>>> + char *numbered_name; >>>>>> + unsigned int *suffix_counter; >>>>>> + if (!clone_fn_ids) { >>>>>> + /* Initialize the per-function counter hash table if this is the >>>>>> first call */ >>>>>> + clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64); >>>>>> + } >>>>> >>>>> I still do not like throwing memory at the problem in this way for the >>>>> little benefit >>>>> this change provides :/ >>>>> >>>>> So no approval from me at this point... >>>>> >>>>> Richard. >>>> >>>> Can you give me an idea of the memory constraints that are involved? >>>> >>>> The highest memory usage increase that I could find was when compiling >>>> a source file (from Linux) with roughly 10,000 functions. It showed a 2kB >>>> increase over the before-patch use of 6936kB which is barely 0.03%. >>>> >>>> Using a single counter can result in more confusing namespacing when >>>> you have .bar.clone.4 despite there only being 3 clones of .bar. >>>> >>>> From a practical point of view this change is helpful to anyone >>>> diffing binary output such as forensic analysts, Debian Reproducible >>>> Builds or even someone validating compiler output (before and after an >>>> input >>>> source patch). The extra changes that this patch alleviates are a >>>> distraction and could even be misleading. For example, applying a >>>> source patch to the same Linux source produces the following binary >>>> diff before my change: >>>> >>>> --- /tmp/output.o.objdump >>>> +++ /tmp/patched-output.o.objdump >>>> @@ -1,5 +1,5 @@ >>>> >>>> -/tmp/uverbs_cmd/output.o: file format elf32-i386 >>>> +/tmp/uverbs_cmd/patched-output.o: file format elf32-i386 >>>> >>>> >>>> Disassembly of section .text.get_order: >>>> @@ -265,12 +265,12 @@ >>>> 3: e9 fc ff ff ff jmp 4 <put_cq_read+0x4> >>>> 4: R_386_PC32 .text.put_uobj_read >>>> >>>> -Disassembly of section .text.trace_kmalloc.constprop.3: >>>> +Disassembly of section .text.trace_kmalloc.constprop.4: >>>> >>>> -00000000 <trace_kmalloc.constprop.3>: >>>> +00000000 <trace_kmalloc.constprop.4>: >>>> 0: 83 3d 04 00 00 00 00 cmpl $0x0,0x4 >>>> 2: R_386_32 __tracepoint_kmalloc >>>> - 7: 74 34 je 3d >>>> <trace_kmalloc.constprop.3+0x3d> >>>> + 7: 74 34 je 3d >>>> <trace_kmalloc.constprop.4+0x3d> >>>> 9: 55 push %ebp >>>> a: 89 cd mov %ecx,%ebp >>>> c: 57 push %edi >>>> @@ -281,7 +281,7 @@ >>>> 13: 8b 1d 10 00 00 00 mov 0x10,%ebx >>>> 15: R_386_32 __tracepoint_kmalloc >>>> 19: 85 db test %ebx,%ebx >>>> - 1b: 74 1b je 38 >>>> <trace_kmalloc.constprop.3+0x38> >>>> + 1b: 74 1b je 38 >>>> <trace_kmalloc.constprop.4+0x38> >>>> 1d: 68 d0 00 00 00 push $0xd0 >>>> 22: 89 fa mov %edi,%edx >>>> 24: 89 f0 mov %esi,%eax >>>> @@ -292,7 +292,7 @@ >>>> 31: 58 pop %eax >>>> 32: 83 3b 00 cmpl $0x0,(%ebx) >>>> 35: 5a pop %edx >>>> - 36: eb e3 jmp 1b >>>> <trace_kmalloc.constprop.3+0x1b> >>>> + 36: eb e3 jmp 1b >>>> <trace_kmalloc.constprop.4+0x1b> >>>> 38: 5b pop %ebx >>>> 39: 5e pop %esi >>>> 3a: 5f pop %edi >>>> @@ -846,7 +846,7 @@ >>>> 78: b8 5f 00 00 00 mov $0x5f,%eax >>>> 79: R_386_32 .text.ib_uverbs_alloc_pd >>>> 7d: e8 fc ff ff ff call 7e <ib_uverbs_alloc_pd+0x7e> >>>> - 7e: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 7e: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 82: c7 45 d4 f4 ff ff ff movl $0xfffffff4,-0x2c(%ebp) >>>> 89: 59 pop %ecx >>>> 8a: 85 db test %ebx,%ebx >>>> @@ -1068,7 +1068,7 @@ >>>> 9c: b8 83 00 00 00 mov $0x83,%eax >>>> 9d: R_386_32 .text.ib_uverbs_reg_mr >>>> a1: e8 fc ff ff ff call a2 <ib_uverbs_reg_mr+0xa2> >>>> - a2: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + a2: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> a6: ba f4 ff ff ff mov $0xfffffff4,%edx >>>> ab: 58 pop %eax >>>> ac: 85 db test %ebx,%ebx >>>> @@ -1385,7 +1385,7 @@ >>>> 99: b8 7b 00 00 00 mov $0x7b,%eax >>>> 9a: R_386_32 .text.ib_uverbs_create_cq >>>> 9e: e8 fc ff ff ff call 9f <ib_uverbs_create_cq+0x9f> >>>> - 9f: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 9f: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> a3: 58 pop %eax >>>> a4: 85 db test %ebx,%ebx >>>> a6: 75 0a jne b2 <ib_uverbs_create_cq+0xb2> >>>> @@ -1607,129 +1607,107 @@ >>>> 00000000 <ib_uverbs_poll_cq>: >>>> 0: 55 push %ebp >>>> 1: 57 push %edi >>>> - 2: 89 c7 mov %eax,%edi >>>> - 4: 56 push %esi >>>> - 5: 89 ce mov %ecx,%esi >>>> - 7: b9 10 00 00 00 mov $0x10,%ecx >>>> - c: 53 push %ebx >>>> - d: 83 ec 18 sub $0x18,%esp >>>> - 10: 8d 44 24 08 lea 0x8(%esp),%eax >>>> - 14: e8 fc ff ff ff call 15 <ib_uverbs_poll_cq+0x15> >>>> - 15: R_386_PC32 copy_from_user >>>> - 19: 85 c0 test %eax,%eax >>>> - 1b: 0f 85 34 01 00 00 jne 155 <ib_uverbs_poll_cq+0x155> >>>> - 21: 6b 44 24 14 34 imul $0x34,0x14(%esp),%eax >>>> - 26: ba d0 00 00 00 mov $0xd0,%edx >>>> - 2b: e8 fc ff ff ff call 2c <ib_uverbs_poll_cq+0x2c> >>>> - 2c: R_386_PC32 __kmalloc >>>> - 30: 89 c5 mov %eax,%ebp >>>> - 32: 85 c0 test %eax,%eax >>>> - 34: 0f 84 22 01 00 00 je 15c <ib_uverbs_poll_cq+0x15c> >>>> - 3a: 6b 44 24 14 30 imul $0x30,0x14(%esp),%eax >>>> - 3f: ba d0 00 00 00 mov $0xd0,%edx >>>> - 44: 83 c0 08 add $0x8,%eax >>>> - 47: 89 44 24 04 mov %eax,0x4(%esp) >>>> - 4b: e8 fc ff ff ff call 4c <ib_uverbs_poll_cq+0x4c> >>>> - 4c: R_386_PC32 __kmalloc >>>> - 50: ba f4 ff ff ff mov $0xfffffff4,%edx >>>> - 55: 89 04 24 mov %eax,(%esp) >>>> - 58: 85 c0 test %eax,%eax >>>> - 5a: 0f 84 e1 00 00 00 je 141 <ib_uverbs_poll_cq+0x141> >>>> - 60: 8b 4f 58 mov 0x58(%edi),%ecx >>>> - 63: 6a 00 push $0x0 >>>> - 65: b8 00 00 00 00 mov $0x0,%eax >>>> - 66: R_386_32 ib_uverbs_cq_idr >>>> - 6a: 8b 54 24 14 mov 0x14(%esp),%edx >>>> - 6e: e8 fc ff ff ff call 6f <ib_uverbs_poll_cq+0x6f> >>>> - 6f: R_386_PC32 .text.idr_read_obj >>>> - 73: ba ea ff ff ff mov $0xffffffea,%edx >>>> - 78: 89 c7 mov %eax,%edi >>>> - 7a: 58 pop %eax >>>> - 7b: 85 ff test %edi,%edi >>>> - 7d: 0f 84 ae 00 00 00 je 131 <ib_uverbs_poll_cq+0x131> >>>> - 83: 8b 1f mov (%edi),%ebx >>>> - 85: 8b 54 24 14 mov 0x14(%esp),%edx >>>> - 89: 89 e9 mov %ebp,%ecx >>>> - 8b: 89 f8 mov %edi,%eax >>>> - 8d: ff 93 70 01 00 00 call *0x170(%ebx) >>>> - 93: 8b 1c 24 mov (%esp),%ebx >>>> - 96: 89 03 mov %eax,(%ebx) >>>> - 98: 89 f8 mov %edi,%eax >>>> - 9a: e8 fc ff ff ff call 9b <ib_uverbs_poll_cq+0x9b> >>>> - 9b: R_386_PC32 .text.put_cq_read >>>> - 9f: 8b 1c 24 mov (%esp),%ebx >>>> - a2: 89 e8 mov %ebp,%eax >>>> - a4: 6b 3b 34 imul $0x34,(%ebx),%edi >>>> - a7: 8d 53 08 lea 0x8(%ebx),%edx >>>> - aa: 01 ef add %ebp,%edi >>>> - ac: 39 f8 cmp %edi,%eax >>>> - ae: 74 67 je 117 <ib_uverbs_poll_cq+0x117> >>>> - b0: 8b 08 mov (%eax),%ecx >>>> - b2: 8b 58 04 mov 0x4(%eax),%ebx >>>> - b5: 83 c2 30 add $0x30,%edx >>>> - b8: 83 c0 34 add $0x34,%eax >>>> - bb: 89 4a d0 mov %ecx,-0x30(%edx) >>>> - be: 89 5a d4 mov %ebx,-0x2c(%edx) >>>> - c1: 8b 48 d4 mov -0x2c(%eax),%ecx >>>> - c4: 89 4a d8 mov %ecx,-0x28(%edx) >>>> - c7: 8b 48 d8 mov -0x28(%eax),%ecx >>>> - ca: 89 4a dc mov %ecx,-0x24(%edx) >>>> - cd: 8b 48 dc mov -0x24(%eax),%ecx >>>> - d0: 89 4a e0 mov %ecx,-0x20(%edx) >>>> - d3: 8b 48 e0 mov -0x20(%eax),%ecx >>>> - d6: 89 4a e4 mov %ecx,-0x1c(%edx) >>>> - d9: 8b 48 e8 mov -0x18(%eax),%ecx >>>> - dc: 89 4a e8 mov %ecx,-0x18(%edx) >>>> - df: 8b 48 e4 mov -0x1c(%eax),%ecx >>>> - e2: 8b 49 20 mov 0x20(%ecx),%ecx >>>> - e5: 89 4a ec mov %ecx,-0x14(%edx) >>>> - e8: 8b 48 ec mov -0x14(%eax),%ecx >>>> - eb: 89 4a f0 mov %ecx,-0x10(%edx) >>>> - ee: 8b 48 f0 mov -0x10(%eax),%ecx >>>> - f1: 89 4a f4 mov %ecx,-0xc(%edx) >>>> - f4: 8b 48 f4 mov -0xc(%eax),%ecx >>>> - f7: 66 89 4a f8 mov %cx,-0x8(%edx) >>>> - fb: 66 8b 48 f6 mov -0xa(%eax),%cx >>>> - ff: 66 89 4a fa mov %cx,-0x6(%edx) >>>> - 103: 8a 48 f8 mov -0x8(%eax),%cl >>>> - 106: 88 4a fc mov %cl,-0x4(%edx) >>>> - 109: 8a 48 f9 mov -0x7(%eax),%cl >>>> - 10c: 88 4a fd mov %cl,-0x3(%edx) >>>> - 10f: 8a 48 fa mov -0x6(%eax),%cl >>>> - 112: 88 4a fe mov %cl,-0x2(%edx) >>>> - 115: eb 95 jmp ac <ib_uverbs_poll_cq+0xac> >>>> - 117: 8b 14 24 mov (%esp),%edx >>>> - 11a: 8b 4c 24 04 mov 0x4(%esp),%ecx >>>> - 11e: 8b 44 24 08 mov 0x8(%esp),%eax >>>> - 122: e8 fc ff ff ff call 123 <ib_uverbs_poll_cq+0x123> >>>> - 123: R_386_PC32 copy_to_user >>>> - 127: 83 f8 01 cmp $0x1,%eax >>>> - 12a: 19 d2 sbb %edx,%edx >>>> - 12c: f7 d2 not %edx >>>> - 12e: 83 e2 f2 and $0xfffffff2,%edx >>>> - 131: 8b 04 24 mov (%esp),%eax >>>> - 134: 89 54 24 04 mov %edx,0x4(%esp) >>>> - 138: e8 fc ff ff ff call 139 <ib_uverbs_poll_cq+0x139> >>>> - 139: R_386_PC32 kfree >>>> - 13d: 8b 54 24 04 mov 0x4(%esp),%edx >>>> - 141: 89 e8 mov %ebp,%eax >>>> - 143: 89 14 24 mov %edx,(%esp) >>>> - 146: e8 fc ff ff ff call 147 <ib_uverbs_poll_cq+0x147> >>>> - 147: R_386_PC32 kfree >>>> - 14b: 8b 14 24 mov (%esp),%edx >>>> - 14e: 85 d2 test %edx,%edx >>>> - 150: 0f 45 f2 cmovne %edx,%esi >>>> - 153: eb 0c jmp 161 <ib_uverbs_poll_cq+0x161> >>>> - 155: be f2 ff ff ff mov $0xfffffff2,%esi >>>> - 15a: eb 05 jmp 161 <ib_uverbs_poll_cq+0x161> >>>> - 15c: be f4 ff ff ff mov $0xfffffff4,%esi >>>> - 161: 83 c4 18 add $0x18,%esp >>>> - 164: 89 f0 mov %esi,%eax >>>> - 166: 5b pop %ebx >>>> - 167: 5e pop %esi >>>> - 168: 5f pop %edi >>>> - 169: 5d pop %ebp >>>> - 16a: c3 ret >>>> + 2: bf f2 ff ff ff mov $0xfffffff2,%edi >>>> + 7: 56 push %esi >>>> + 8: 53 push %ebx >>>> + 9: 89 c3 mov %eax,%ebx >>>> + b: 81 ec 84 00 00 00 sub $0x84,%esp >>>> + 11: 89 4c 24 04 mov %ecx,0x4(%esp) >>>> + 15: 8d 44 24 10 lea 0x10(%esp),%eax >>>> + 19: b9 10 00 00 00 mov $0x10,%ecx >>>> + 1e: e8 fc ff ff ff call 1f <ib_uverbs_poll_cq+0x1f> >>>> + 1f: R_386_PC32 copy_from_user >>>> + 23: 89 04 24 mov %eax,(%esp) >>>> + 26: 85 c0 test %eax,%eax >>>> + 28: 0f 85 1f 01 00 00 jne 14d <ib_uverbs_poll_cq+0x14d> >>>> + 2e: 8b 4b 58 mov 0x58(%ebx),%ecx >>>> + 31: 6a 00 push $0x0 >>>> + 33: b8 00 00 00 00 mov $0x0,%eax >>>> + 34: R_386_32 ib_uverbs_cq_idr >>>> + 38: bf ea ff ff ff mov $0xffffffea,%edi >>>> + 3d: 8b 54 24 1c mov 0x1c(%esp),%edx >>>> + 41: e8 fc ff ff ff call 42 <ib_uverbs_poll_cq+0x42> >>>> + 42: R_386_PC32 .text.idr_read_obj >>>> + 46: 89 c3 mov %eax,%ebx >>>> + 48: 58 pop %eax >>>> + 49: 85 db test %ebx,%ebx >>>> + 4b: 0f 84 fc 00 00 00 je 14d <ib_uverbs_poll_cq+0x14d> >>>> + 51: 8b 6c 24 10 mov 0x10(%esp),%ebp >>>> + 55: b9 02 00 00 00 mov $0x2,%ecx >>>> + 5a: 8d 7c 24 08 lea 0x8(%esp),%edi >>>> + 5e: 8b 04 24 mov (%esp),%eax >>>> + 61: 8d 75 08 lea 0x8(%ebp),%esi >>>> + 64: f3 ab rep stos %eax,%es:(%edi) >>>> + 66: 8b 44 24 1c mov 0x1c(%esp),%eax >>>> + 6a: 39 44 24 08 cmp %eax,0x8(%esp) >>>> + 6e: 73 1f jae 8f <ib_uverbs_poll_cq+0x8f> >>>> + 70: 8b 3b mov (%ebx),%edi >>>> + 72: 8d 4c 24 50 lea 0x50(%esp),%ecx >>>> + 76: ba 01 00 00 00 mov $0x1,%edx >>>> + 7b: 89 d8 mov %ebx,%eax >>>> + 7d: ff 97 70 01 00 00 call *0x170(%edi) >>>> + 83: 89 c7 mov %eax,%edi >>>> + 85: 85 c0 test %eax,%eax >>>> + 87: 0f 88 b9 00 00 00 js 146 <ib_uverbs_poll_cq+0x146> >>>> + 8d: 75 21 jne b0 <ib_uverbs_poll_cq+0xb0> >>>> + 8f: b9 08 00 00 00 mov $0x8,%ecx >>>> + 94: 8d 54 24 08 lea 0x8(%esp),%edx >>>> + 98: 89 e8 mov %ebp,%eax >>>> + 9a: bf f2 ff ff ff mov $0xfffffff2,%edi >>>> + 9f: e8 fc ff ff ff call a0 <ib_uverbs_poll_cq+0xa0> >>>> + a0: R_386_PC32 copy_to_user >>>> + a4: 85 c0 test %eax,%eax >>>> + a6: 0f 44 7c 24 04 cmove 0x4(%esp),%edi >>>> + ab: e9 96 00 00 00 jmp 146 <ib_uverbs_poll_cq+0x146> >>>> + b0: 8b 44 24 50 mov 0x50(%esp),%eax >>>> + b4: 8b 54 24 54 mov 0x54(%esp),%edx >>>> + b8: b9 30 00 00 00 mov $0x30,%ecx >>>> + bd: 89 44 24 20 mov %eax,0x20(%esp) >>>> + c1: 8b 44 24 58 mov 0x58(%esp),%eax >>>> + c5: 89 54 24 24 mov %edx,0x24(%esp) >>>> + c9: 8b 54 24 74 mov 0x74(%esp),%edx >>>> + cd: 89 44 24 28 mov %eax,0x28(%esp) >>>> + d1: 8b 44 24 5c mov 0x5c(%esp),%eax >>>> + d5: 89 44 24 2c mov %eax,0x2c(%esp) >>>> + d9: 8b 44 24 60 mov 0x60(%esp),%eax >>>> + dd: 89 44 24 30 mov %eax,0x30(%esp) >>>> + e1: 8b 44 24 64 mov 0x64(%esp),%eax >>>> + e5: 89 44 24 34 mov %eax,0x34(%esp) >>>> + e9: 8b 44 24 6c mov 0x6c(%esp),%eax >>>> + ed: 89 44 24 38 mov %eax,0x38(%esp) >>>> + f1: 8b 44 24 68 mov 0x68(%esp),%eax >>>> + f5: 8b 40 20 mov 0x20(%eax),%eax >>>> + f8: 89 54 24 44 mov %edx,0x44(%esp) >>>> + fc: 8d 54 24 20 lea 0x20(%esp),%edx >>>> + 100: c6 44 24 4f 00 movb $0x0,0x4f(%esp) >>>> + 105: 89 44 24 3c mov %eax,0x3c(%esp) >>>> + 109: 8b 44 24 70 mov 0x70(%esp),%eax >>>> + 10d: 89 44 24 40 mov %eax,0x40(%esp) >>>> + 111: 8b 44 24 78 mov 0x78(%esp),%eax >>>> + 115: 89 44 24 48 mov %eax,0x48(%esp) >>>> + 119: 8b 44 24 7c mov 0x7c(%esp),%eax >>>> + 11d: 66 89 44 24 4c mov %ax,0x4c(%esp) >>>> + 122: 8a 44 24 7e mov 0x7e(%esp),%al >>>> + 126: 88 44 24 4e mov %al,0x4e(%esp) >>>> + 12a: 89 f0 mov %esi,%eax >>>> + 12c: e8 fc ff ff ff call 12d <ib_uverbs_poll_cq+0x12d> >>>> + 12d: R_386_PC32 copy_to_user >>>> + 131: 85 c0 test %eax,%eax >>>> + 133: 75 0c jne 141 <ib_uverbs_poll_cq+0x141> >>>> + 135: 83 c6 30 add $0x30,%esi >>>> + 138: ff 44 24 08 incl 0x8(%esp) >>>> + 13c: e9 25 ff ff ff jmp 66 <ib_uverbs_poll_cq+0x66> >>>> + 141: bf f2 ff ff ff mov $0xfffffff2,%edi >>>> + 146: 89 d8 mov %ebx,%eax >>>> + 148: e8 fc ff ff ff call 149 <ib_uverbs_poll_cq+0x149> >>>> + 149: R_386_PC32 .text.put_cq_read >>>> + 14d: 81 c4 84 00 00 00 add $0x84,%esp >>>> + 153: 89 f8 mov %edi,%eax >>>> + 155: 5b pop %ebx >>>> + 156: 5e pop %esi >>>> + 157: 5f pop %edi >>>> + 158: 5d pop %ebp >>>> + 159: c3 ret >>>> >>>> Disassembly of section .text.ib_uverbs_req_notify_cq: >>>> >>>> @@ -1915,7 +1893,7 @@ >>>> 94: b8 7b 00 00 00 mov $0x7b,%eax >>>> 95: R_386_32 .text.ib_uverbs_create_qp >>>> 99: e8 fc ff ff ff call 9a <ib_uverbs_create_qp+0x9a> >>>> - 9a: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 9a: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 9e: 59 pop %ecx >>>> 9f: c7 85 50 ff ff ff f4 movl $0xfffffff4,-0xb0(%ebp) >>>> a6: ff ff ff >>>> @@ -2241,7 +2219,7 @@ >>>> 68: b8 4f 00 00 00 mov $0x4f,%eax >>>> 69: R_386_32 .text.ib_uverbs_query_qp >>>> 6d: e8 fc ff ff ff call 6e <ib_uverbs_query_qp+0x6e> >>>> - 6e: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 6e: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 72: 59 pop %ecx >>>> 73: c7 85 5c ff ff ff 10 movl $0x10,-0xa4(%ebp) >>>> 7a: 00 00 00 >>>> @@ -2260,7 +2238,7 @@ >>>> a3: b8 86 00 00 00 mov $0x86,%eax >>>> a4: R_386_32 .text.ib_uverbs_query_qp >>>> a8: e8 fc ff ff ff call a9 <ib_uverbs_query_qp+0xa9> >>>> - a9: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + a9: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> ad: 5a pop %edx >>>> ae: 85 db test %ebx,%ebx >>>> b0: 0f 84 c1 01 00 00 je 277 <ib_uverbs_query_qp+0x277> >>>> @@ -2462,7 +2440,7 @@ >>>> 88: b8 6f 00 00 00 mov $0x6f,%eax >>>> 89: R_386_32 .text.ib_uverbs_modify_qp >>>> 8d: e8 fc ff ff ff call 8e <ib_uverbs_modify_qp+0x8e> >>>> - 8e: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 8e: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 92: 5a pop %edx >>>> 93: ba f4 ff ff ff mov $0xfffffff4,%edx >>>> 98: 85 db test %ebx,%ebx >>>> @@ -3129,7 +3107,7 @@ >>>> 6a: b8 4c 00 00 00 mov $0x4c,%eax >>>> 6b: R_386_32 .text.ib_uverbs_create_ah >>>> 6f: e8 fc ff ff ff call 70 <ib_uverbs_create_ah+0x70> >>>> - 70: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 70: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 74: 58 pop %eax >>>> 75: 85 db test %ebx,%ebx >>>> 77: 75 0a jne 83 <ib_uverbs_create_ah+0x83> >>>> @@ -3396,7 +3374,7 @@ >>>> af: b8 91 00 00 00 mov $0x91,%eax >>>> b0: R_386_32 .text.ib_uverbs_attach_mcast >>>> b4: e8 fc ff ff ff call b5 >>>> <ib_uverbs_attach_mcast+0xb5> >>>> - b5: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + b5: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> b9: 58 pop %eax >>>> ba: 85 db test %ebx,%ebx >>>> bc: 75 07 jne c5 >>>> <ib_uverbs_attach_mcast+0xc5> >>>> @@ -3572,7 +3550,7 @@ >>>> 77: b8 5e 00 00 00 mov $0x5e,%eax >>>> 78: R_386_32 .text.ib_uverbs_create_srq >>>> 7c: e8 fc ff ff ff call 7d <ib_uverbs_create_srq+0x7d> >>>> - 7d: R_386_PC32 .text.trace_kmalloc.constprop.3 >>>> + 7d: R_386_PC32 .text.trace_kmalloc.constprop.4 >>>> 81: ba f4 ff ff ff mov $0xfffffff4,%edx >>>> 86: 58 pop %eax >>>> 87: 85 db test %ebx,%ebx >>>> >>>> Needless to say, after my change the diff only shows the truly changed >>>> functions. >>>> >>>> - Michael >>>> >>> >>> Updated patch >>> >>> >>> gcc: >>> 2018-07-26 Michael Ploujnikov <michael.ploujni...@oracle.com> >>> >>> Make function clone name numbering independent. >>> * cgraph.h (clone_function_name_1): Add clone_num argument >>> * cgraphclones.c: Replace clone_fn_id_num with clone_fn_ids. >>> (clone_function_name): Use counters from the hashtable. >>> >>> lto: >>> 2018-07-26 Michael Ploujnikov <michael.ploujni...@oracle.com> >>> >>> * lto-partition.c (privatize_symbol_name_1): Pass 0 to >>> clone_function_name_1. >>> >>> >>> testsuite: >>> 2018-07-26 Michael Ploujnikov <michael.ploujni...@oracle.com> >>> >>> Clone numbering should be independent for each function. >>> * gcc/testsuite/gcc.dg/independent-cloneids-1.c: New test. >>> >>> --- >>> gcc/cgraph.h | 2 +- >>> gcc/cgraphclones.c | 16 ++++++++--- >>> gcc/lto/lto-partition.c | 2 +- >>> gcc/testsuite/gcc.dg/independent-cloneids-1.c | 38 >>> +++++++++++++++++++++++++++ >>> 4 files changed, 52 insertions(+), 6 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.dg/independent-cloneids-1.c >>> >>> diff --git gcc/cgraph.h gcc/cgraph.h >>> index a8b1b4c..fe44bd0 100644 >>> --- gcc/cgraph.h >>> +++ gcc/cgraph.h >>> @@ -2368,7 +2368,7 @@ basic_block init_lowered_empty_function (tree, bool, >>> profile_count); >>> tree thunk_adjust (gimple_stmt_iterator *, tree, bool, HOST_WIDE_INT, >>> tree); >>> /* In cgraphclones.c */ >>> >>> -tree clone_function_name_1 (const char *, const char *); >>> +tree clone_function_name_1 (const char *, const char *, unsigned int); >>> tree clone_function_name (tree decl, const char *); >>> >>> void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *, >>> diff --git gcc/cgraphclones.c gcc/cgraphclones.c >>> index 6e84a31..7de7a65 100644 >>> --- gcc/cgraphclones.c >>> +++ gcc/cgraphclones.c >>> @@ -512,13 +512,13 @@ cgraph_node::create_clone (tree new_decl, >>> profile_count >>> prof_count, >>> return new_node; >>> } >>> >>> -static GTY(()) unsigned int clone_fn_id_num; >>> +static GTY(()) hash_map<const char *, unsigned> *clone_fn_ids; >>> >>> /* Return a new assembler name for a clone with SUFFIX of a decl named >>> NAME. */ >>> >>> tree >>> -clone_function_name_1 (const char *name, const char *suffix) >>> +clone_function_name_1 (const char *name, const char *suffix, unsigned int >>> clone_num) >>> { >>> size_t len = strlen (name); >>> char *tmp_name, *prefix; >>> @@ -527,7 +527,7 @@ clone_function_name_1 (const char *name, const char >>> *suffix) >>> memcpy (prefix, name, len); >>> strcpy (prefix + len + 1, suffix); >>> prefix[len] = symbol_table::symbol_suffix_separator (); >>> - ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++); >>> + ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_num); >>> return get_identifier (tmp_name); >>> } >>> >>> @@ -537,7 +537,15 @@ tree >>> clone_function_name (tree decl, const char *suffix) >>> { >>> tree name = DECL_ASSEMBLER_NAME (decl); >>> - return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix); >>> + const char *decl_name = IDENTIFIER_POINTER (name); >>> + unsigned int *suffix_counter; >>> + if (!clone_fn_ids) { >>> + /* Initialize the per-function counter hash table on first call */ >>> + clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64); >>> + } >>> + suffix_counter = &clone_fn_ids->get_or_insert(decl_name); >>> + *suffix_counter = *suffix_counter + 1; >>> + return clone_function_name_1 (decl_name, suffix, *suffix_counter); >>> } >>> >>> >>> diff --git gcc/lto/lto-partition.c gcc/lto/lto-partition.c >>> index c7a5710..4b15232 100644 >>> --- gcc/lto/lto-partition.c >>> +++ gcc/lto/lto-partition.c >>> @@ -965,7 +965,7 @@ privatize_symbol_name_1 (symtab_node *node, tree decl) >>> name = maybe_rewrite_identifier (name); >>> symtab->change_decl_assembler_name (decl, >>> clone_function_name_1 (name, >>> - "lto_priv")); >>> + "lto_priv", 0)); >>> >>> if (node->lto_file_data) >>> lto_record_renamed_decl (node->lto_file_data, name, >>> diff --git gcc/testsuite/gcc.dg/independent-cloneids-1.c >>> gcc/testsuite/gcc.dg/independent-cloneids-1.c >>> new file mode 100644 >>> index 0000000..d723e20 >>> --- /dev/null >>> +++ gcc/testsuite/gcc.dg/independent-cloneids-1.c >>> @@ -0,0 +1,38 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-O3 -fipa-cp -fipa-cp-clone -fdump-ipa-cp" } */ >>> + >>> +extern int printf (const char *, ...); >>> + >>> +static int __attribute__ ((noinline)) >>> +foo (int arg) >>> +{ >>> + return 7 * arg; >>> +} >>> + >>> +static int __attribute__ ((noinline)) >>> +bar (int arg) >>> +{ >>> + return arg * arg; >>> +} >>> + >>> +int >>> +baz (int arg) >>> +{ >>> + printf("%d\n", bar (3)); >>> + printf("%d\n", bar (4)); >>> + printf("%d\n", foo (5)); >>> + printf("%d\n", foo (6)); >>> + /* adding or removing the following call should not affect foo >>> + function's clone numbering */ >>> + printf("%d\n", bar (7)); >>> + return foo (8); >>> +} >>> + >>> +/* { dg-final { scan-ipa-dump "Function bar.constprop.0" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump "Function bar.constprop.1" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump "Function bar.constprop.3" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump "Function foo.constprop.0" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump "Function foo.constprop.1" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump "Function foo.constprop.2" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.3" "cp" } } */ >>> +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.4" "cp" } } */ >>> >> >> Ping? Could someone else please take a look at this since Richard appears to >> be >> overloaded at the moment? > > The implementation is now how I suggested. I'm still not 100% agreeing to > the solution of using a hash-map here as I hoped that most callers have > a good idea themselves what "number" of clone they are creating. Thus > most of them should be explicit in passing the number. But I didn't > investigate > any of them (but the LTO one which I think doesn't need the suffix at all). > > So if anybody besides me is fine with this approach feel free to approve. > > One minor nit: > >>> + if (!clone_fn_ids) { >>> + /* Initialize the per-function counter hash table on first call */ >>> + clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64); >>> + } > > omit {} around single stmts, the comment doesn't add much information but > if you want to keep it move it before the if(). > > Thanks and sorry for the delay (slowly wading through older patches...), > Richard.
No worries, and thank you for the review. I dug a little deeper and found that LTO does need the numbered suffixes. Otherwise I get errors in a lot of tests that look like: ... spawn -ignore SIGHUP /gcc/build/gcc/xgcc -B/gcc/build/gcc/ c_lto_pr60820_0.o c_lto_pr60820_1.o -fno-diagnostics-show-caret -fdiagnostics-color=never -flto -r -nostdlib -O2 -flinker-output=nolto-rel -o gcc-dg-lto-pr60820-01.exe pid is 52067 -52067 lto1: error: two or more sections for .gnu.lto_local_in6addr_any.lto_priv.0.lto_priv.0.3f1a2975b4946e93 lto1: internal compiler error: cannot read LTO decls from /tmp/ccwo9PaB.ltrans0.o ... which makes sense because lto-partition.c:rename_statics calls privatize_symbol_name in a loop over symbols with the same asm name. So I'm planning to go back to the version where clone_function_name_1 just takes two strings and I'm wondering if I should do: name = IDENTIFIER_POINTER (get_identifier (maybe_rewrite_identifier (name))); in privatize_symbol_name_1 to guarantee that name always persists. - Michael
From 03e9191e18e3935171f0281b588b0c6191e46253 Mon Sep 17 00:00:00 2001 From: Michael Ploujnikov <michael.ploujni...@oracle.com> Date: Mon, 16 Jul 2018 12:55:49 -0400 Subject: [PATCH] Make function assembly more independent. gcc: 2018-08-02 Michael Ploujnikov <michael.ploujni...@oracle.com> Make function clone name numbering independent. * cgraphclones.c: Replace clone_fn_id_num with clone_fn_ids. (clone_function_name_1): Use it. lto: 2018-08-02 Michael Ploujnikov <michael.ploujni...@oracle.com> * lto-partition.c (privatize_symbol_name_1): Pass a persistent identifier string to clone_function_name_1. testsuite: 2018-08-02 Michael Ploujnikov <michael.ploujni...@oracle.com> Clone id counters should be completely independent from one another. * gcc/testsuite/gcc.dg/independent-cloneids-1.c: New test. --- gcc/cgraphclones.c | 10 +++++-- gcc/lto/lto-partition.c | 2 +- gcc/testsuite/gcc.dg/independent-cloneids-1.c | 38 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/independent-cloneids-1.c diff --git gcc/cgraphclones.c gcc/cgraphclones.c index 6e84a31..dfed9e5 100644 --- gcc/cgraphclones.c +++ gcc/cgraphclones.c @@ -512,7 +512,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, return new_node; } -static GTY(()) unsigned int clone_fn_id_num; +static GTY(()) hash_map<const char *, unsigned> *clone_fn_ids; /* Return a new assembler name for a clone with SUFFIX of a decl named NAME. */ @@ -522,12 +522,18 @@ clone_function_name_1 (const char *name, const char *suffix) { size_t len = strlen (name); char *tmp_name, *prefix; + unsigned int *suffix_counter; prefix = XALLOCAVEC (char, len + strlen (suffix) + 2); memcpy (prefix, name, len); strcpy (prefix + len + 1, suffix); prefix[len] = symbol_table::symbol_suffix_separator (); - ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++); + /* Initialize the per-function counter hash table on first call */ + if (!clone_fn_ids) + clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64); + suffix_counter = &clone_fn_ids->get_or_insert(name); + *suffix_counter = *suffix_counter + 1; + ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, *suffix_counter); return get_identifier (tmp_name); } diff --git gcc/lto/lto-partition.c gcc/lto/lto-partition.c index c7a5710..fc8514a 100644 --- gcc/lto/lto-partition.c +++ gcc/lto/lto-partition.c @@ -962,7 +962,7 @@ privatize_symbol_name_1 (symtab_node *node, tree decl) if (must_not_rename (node, name)) return false; - name = maybe_rewrite_identifier (name); + name = IDENTIFIER_POINTER (get_identifier (maybe_rewrite_identifier (name))); symtab->change_decl_assembler_name (decl, clone_function_name_1 (name, "lto_priv")); diff --git gcc/testsuite/gcc.dg/independent-cloneids-1.c gcc/testsuite/gcc.dg/independent-cloneids-1.c new file mode 100644 index 0000000..d723e20 --- /dev/null +++ gcc/testsuite/gcc.dg/independent-cloneids-1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fipa-cp -fipa-cp-clone -fdump-ipa-cp" } */ + +extern int printf (const char *, ...); + +static int __attribute__ ((noinline)) +foo (int arg) +{ + return 7 * arg; +} + +static int __attribute__ ((noinline)) +bar (int arg) +{ + return arg * arg; +} + +int +baz (int arg) +{ + printf("%d\n", bar (3)); + printf("%d\n", bar (4)); + printf("%d\n", foo (5)); + printf("%d\n", foo (6)); + /* adding or removing the following call should not affect foo + function's clone numbering */ + printf("%d\n", bar (7)); + return foo (8); +} + +/* { dg-final { scan-ipa-dump "Function bar.constprop.0" "cp" } } */ +/* { dg-final { scan-ipa-dump "Function bar.constprop.1" "cp" } } */ +/* { dg-final { scan-ipa-dump "Function bar.constprop.3" "cp" } } */ +/* { dg-final { scan-ipa-dump "Function foo.constprop.0" "cp" } } */ +/* { dg-final { scan-ipa-dump "Function foo.constprop.1" "cp" } } */ +/* { dg-final { scan-ipa-dump "Function foo.constprop.2" "cp" } } */ +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.3" "cp" } } */ +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.4" "cp" } } */ -- 2.7.4
signature.asc
Description: OpenPGP digital signature