[Bug tree-optimization/108418] New: gcc does not optimize trivial code

2023-01-16 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108418

Bug ID: 108418
   Summary: gcc does not optimize trivial code
   Product: gcc
   Version: 13.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: socketpair at gmail dot com
  Target Milestone: ---

https://godbolt.org/z/s3j8jK6ca

```
#include 

int firewall1(const uint8_t *restrict data) {
const uint8_t ip_proto = *data;
const uint16_t dst_port = *((const uint16_t *)data + 32);
const uint16_t qwe = *((const uint16_t *)data + 64);

if (ip_proto == 17 && dst_port == 17 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 23 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 45 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 63 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 0 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 2 && qwe == 42) return 1;
if (ip_proto == 17 && dst_port == 3 && qwe == 42) return 1;

return 0;
}

int firewall2(const uint8_t *restrict data) {
const uint8_t ip_proto = *data;
const uint16_t dst_port = *((const uint16_t *)data + 32);
const uint16_t qwe = *((const uint16_t *)data + 64);

if (ip_proto == 17 && dst_port == 17) return 1;
if (ip_proto == 17 && dst_port == 23) return 1;
if (ip_proto == 17 && dst_port == 45) return 1;
if (ip_proto == 17 && dst_port == 63) return 1;
if (ip_proto == 17 && dst_port == 0) return 1;
if (ip_proto == 17 && dst_port == 2) return 1;
if (ip_proto == 17 && dst_port == 3) return 1;

return 0;
}
```

It can't understand common condition (ip_proto == 17 && qwe == 42).

But it can for simpler case in firewall2.

See godbolt assembler output.

[Bug tree-optimization/108418] gcc does not optimize trivial code

2023-01-16 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108418

--- Comment #1 from Коренберг Марк  ---
Sorry, but such kind of code happens as a result of C-code automatic
generation.

[Bug regression/107767] New: GCC has some problems in optimizer of trivial case

2022-11-20 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107767

Bug ID: 107767
   Summary: GCC has some problems in optimizer of trivial case
   Product: gcc
   Version: 13.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: regression
  Assignee: unassigned at gcc dot gnu.org
  Reporter: socketpair at gmail dot com
  Target Milestone: ---

See https://godbolt.org/z/rTfTondfP

```
#include 

int firewall(const uint8_t *restrict data) {
const uint8_t ip_proto = *data;
const uint16_t dst_port = *((const uint16_t *)data + 32);

if (ip_proto == 17 && dst_port == 15) return 1;
if (ip_proto == 17 && dst_port == 23) return 1;
if (ip_proto == 17 && dst_port == 47) return 1;
if (ip_proto == 17 && dst_port == 45) return 1;
if (ip_proto == 17 && dst_port == 42) return 1;
if (ip_proto == 17 && dst_port == 1) return 1;
if (ip_proto == 17 && dst_port == 2) return 1;
if (ip_proto == 17 && dst_port == 3) return 1;

return 0;
}

int firewall2(const uint8_t *restrict data) {
const uint16_t dst_port = *((const uint16_t *)data + 32);

if (dst_port == 15) return 1;
if (dst_port == 23) return 1;
if (dst_port == 47) return 1;
if (dst_port == 45) return 1;
if (dst_port == 42) return 1;
if (dst_port == 1) return 1;
if (dst_port == 2) return 1;
if (dst_port == 3) return 1;

return 0;
}
```

Compile with -Os.

Second function IS NOT minimal, obviously. It's a bug. GCC 12.2 does not have
it.

[Bug regression/107767] GCC has some problems in optimizer of trivial case

2022-11-20 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107767

--- Comment #1 from Коренберг Марк  ---
See assembler output for firewall2(). It's not -Os optimized (compare to
firewall(), which is ok)


```
firewall:
movw64(%rdi), %ax
cmpb$17, (%rdi)
sete%cl
leal-15(%rax), %edx
testw   $-9, %dx
movb$1, %dl
sete%sil
cmpw$47, %ax
ja  .L2
movabsq $-180319906955279, %rdx
btq %rax, %rdx
setc%dl
.L2:
movl%edx, %eax
xorl$1, %eax
orl %esi, %eax
andl%ecx, %eax
movzbl  %al, %eax
ret
firewall2:
movw64(%rdi), %ax
xorl%edx, %edx
decl%eax
cmpw$46, %ax
ja  .L5
movzwl  %ax, %eax
movsbl  CSWTCH.2(%rax), %edx
.L5:
movl%edx, %eax
ret
CSWTCH.2:
.byte   1
.byte   1
.byte   1
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   1
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   1
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   0
.byte   1
.byte   0
.byte   0
.byte   1
.byte   0
.byte   1
```

[Bug regression/107767] GCC has some problems in optimizer of trivial case

2022-11-21 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107767

--- Comment #3 from Коренберг Марк  ---
I forgot to add. gcc 12.2 - everything is OK. gcc 13  - with bug.

[Bug tree-optimization/107767] [13 Regression] switch to table conversion happening even though using btq is better

2022-12-02 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107767

--- Comment #5 from Коренберг Марк  ---
Not only -s problem. I think -O3 in gcc 12.2 will run faster than -O3 in gcc 13
(for this case). this code should not be treated as if-else-if-else-if. gcc 12
does its job right.

[Bug tree-optimization/107767] [13 Regression] switch to table conversion happening even though using btq is better

2022-12-02 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107767

--- Comment #8 from Коренберг Марк  ---
Okay, but why switch-case is not handled using fast implementation using masks
(when difference between smallest and biggest integer <=64 ?

See the first function in my first message where it works as expected.

Seems, the problem is not in converting to switch-case, but missing
optimisation for switch-case case.

[Bug tree-optimization/108215] New: Does not optimize trivial case with bit operations

2022-12-23 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108215

Bug ID: 108215
   Summary: Does not optimize trivial case with bit operations
   Product: gcc
   Version: 13.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: socketpair at gmail dot com
  Target Milestone: ---

https://godbolt.org/z/5e3eKqPqs

```C
#include 

int firewall3(const uint8_t *restrict data) {
const uint32_t src = *((const uint32_t *)data);
if ((src & 0x) == 0x1122) return 1;
if ((src & 0xFF00) == 0x11223300) return 1;
return 0;
}

int firewall4(const uint8_t *restrict data) {
const uint32_t src = *((const uint32_t *)data);
if ((src & 0xFF00) == 0x11223300) return 1;
if ((src & 0x) == 0x1122) return 1;
return 0;
}
```

```
firewall3:
movl(%rdi), %eax
xorw%ax, %ax
cmpl$287440896, %eax
sete%al
movzbl  %al, %eax
ret
firewall4:
movl(%rdi), %eax
movl$1, %edx
movl%eax, %ecx
xorb%cl, %cl
cmpl$287453952, %ecx
je  .L3
xorw%ax, %ax
xorl%edx, %edx
cmpl$287440896, %eax
sete%dl
.L3:
movl%edx, %eax
ret
```

firewall3(): Excellent!
firewall4(): FAIL!

It's obvious that order of comparisons in this example does not matter. So I
think misoptimisation of firewall4() is a bug.

[Bug c++/111577] New: -Os gives significantly bigger code than -O0

2023-09-24 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111577

Bug ID: 111577
   Summary: -Os gives significantly bigger code than -O0
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: socketpair at gmail dot com
  Target Milestone: ---

Yes, I saw #35806, #41175 and others.

See https://godbolt.org/z/Pnh89Y3Yb

```
#include 

using namespace std;

int main(int argc, char* argv[]) {
if (argv[0] == nullptr || argv[1] == nullptr) return 0;

string zxc(argv[0]);
string qwe(argv[1]);
string asd(argv[2]);

zxc = qwe + asd;

return zxc.size();
}
```

-Os -std=c++2b -march=skylake -m64 (615 bytes)

compare size with the smae, but with options:

-O0 -std=c++2b -march=skylake -m64 (409 bytes)

-O0 - is much LESS (!) in bytes. I think it's a bug.

[Bug tree-optimization/116518] New: GCC does not optimize-out useless operations. Clang does.

2024-08-28 Thread socketpair at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116518

Bug ID: 116518
   Summary: GCC does not optimize-out useless operations. Clang
does.
   Product: gcc
   Version: 15.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: socketpair at gmail dot com
  Target Milestone: ---

https://godbolt.org/z/d46448vqa

-std=c++23 -O3 -fno-exceptions

#include 
#include 
#include 
#include 
#include 

using namespace std;

namespace {
consteval string gen() { return "xxx"; }
class qwe {
   public:
consteval qwe(const string& str) : s(str.size()) { str.copy(x.data(), s); }
constexpr operator const vector() const {
return {x.cbegin(), x.cbegin() + s};
}

   private:
// 1048576 is a clang limit
array x{};
size_t s;
};

}  // namespace

int fun1() {
const vector v2{qwe(gen())};

return v2.size();
}

-
fun1():
subq$10024, %rsp
movl$10008, %edx
xorl%esi, %esi
movq%rsp, %rdi
callmemset
movl$3, %edi
movl$7895160, (%rsp)
calloperator new(unsigned long)
movzwl  (%rsp), %edx
movl$3, %esi
movq%rax, %rdi
movw%dx, (%rax)
movzbl  2(%rsp), %edx
movb%dl, 2(%rax)
calloperator delete(void*, unsigned long)
movl$3, %eax
addq$10024, %rsp
ret
-

Clang:


fun1():
mov eax, 3
ret