------- Comment #20 from ubizjak at gmail dot com 2009-09-10 17:29 ------- I have compared the dumps of two functions:
--function 1, WORKS-- extern void abort (void); int foo_arg; long x; void f4 (int i, ...) { __builtin_va_list ap; __builtin_va_start(ap,i); x = __builtin_va_arg(ap,double); foo_arg = __builtin_va_arg(ap,int); __builtin_va_end(ap); } --stdarg dump of function 1-- ;; Function f4 (f4) f4: va_list escapes 0, needs to save 16 GPR units and 3 FPR units. f4 (int i) { struct ap; int foo_arg.2; int D.2032; int * D.2031; long unsigned int D.2030; int * D.2029; long int D.2028; long int x.1; double D.2026; int D.2025; double * D.2024; long int D.2022; long unsigned int iftmp.0; double * D.2018; long int D.2017; int D.2016; void * D.2015; <bb 2>: __builtin_va_start (&ap, 0); D.2015_2 = ap.__base; D.2016_3 = ap.__offset; D.2017_4 = (long int) D.2016_3; D.2018_5 = (double *) D.2015_2; if (D.2017_4 <= 47) goto <bb 3>; else goto <bb 4>; <bb 3>: D.2022_6 = D.2017_4 + -48; iftmp.0_7 = (long unsigned int) D.2022_6; goto <bb 5>; <bb 4>: iftmp.0_8 = (long unsigned int) D.2017_4; <bb 5>: # iftmp.0_1 = PHI <iftmp.0_7(3), iftmp.0_8(4)> D.2024_9 = D.2018_5 + iftmp.0_1; D.2017_10 = D.2017_4 + 8; D.2025_11 = (int) D.2017_10; ap.__offset = D.2025_11; D.2026_12 = *D.2024_9; x.1_13 = (long int) D.2026_12; x = x.1_13; D.2028_16 = (long int) D.2025_11; D.2029_17 = (int *) D.2015_2; D.2030_18 = (long unsigned int) D.2028_16; D.2031_19 = D.2029_17 + D.2030_18; D.2028_20 = D.2028_16 + 8; D.2032_21 = (int) D.2028_20; ap.__offset = D.2032_21; foo_arg.2_22 = *D.2031_19; foo_arg = foo_arg.2_22; __builtin_va_end (&ap); return; } --function 2, FAILS-- extern void abort (void); int foo_arg; long x; static void foo (int v, __builtin_va_list ap) { switch (v) { case 5: foo_arg = __builtin_va_arg(ap,int); break; default: abort (); } } void f4 (int i, ...) { __builtin_va_list ap; __builtin_va_start(ap,i); x = __builtin_va_arg(ap,double); foo (i, ap); __builtin_va_end(ap); } --stdarg dump of function 2-- ;; Function f4 (f4) f4: va_list escapes 0, needs to save 8 GPR units and 3 FPR units. f4 (int i) { long int D.2051; int * D.2050; long unsigned int D.2049; int * D.2048; int foo_arg.2; struct ap; long int x.1; double D.2033; int D.2032; double * D.2031; long int D.2029; long unsigned int iftmp.0; double * D.2025; long int D.2024; int D.2023; void * D.2022; <bb 2>: __builtin_va_start (&ap, 0); D.2022_2 = ap.__base; D.2023_3 = ap.__offset; D.2024_4 = (long int) D.2023_3; D.2025_5 = (double *) D.2022_2; if (D.2024_4 <= 47) goto <bb 3>; else goto <bb 4>; <bb 3>: D.2029_6 = D.2024_4 + -48; iftmp.0_7 = (long unsigned int) D.2029_6; goto <bb 5>; <bb 4>: iftmp.0_8 = (long unsigned int) D.2024_4; <bb 5>: # iftmp.0_1 = PHI <iftmp.0_7(3), iftmp.0_8(4)> D.2031_9 = D.2025_5 + iftmp.0_1; D.2024_10 = D.2024_4 + 8; D.2032_11 = (int) D.2024_10; ap.__offset = D.2032_11; D.2033_12 = *D.2031_9; x.1_13 = (long int) D.2033_12; x = x.1_13; switch (i_14(D)) <default: <L4>, case 5: <L3>> <L3>: D.2051_23 = (long int) D.2032_11; D.2050_24 = (int *) D.2022_2; D.2049_25 = (long unsigned int) D.2051_23; D.2048_26 = D.2050_24 + D.2049_25; foo_arg.2_27 = *D.2048_26; foo_arg = foo_arg.2_27; __builtin_va_end (&ap); return; <L4>: abort (); } Tracing through the tree-stdarg.c, the difference is in the number of calls to va_list_counter_struct_op from this place: { if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) == GIMPLE_SINGLE_RHS) { /* Check for ap[0].field = temp. */ >>> if (va_list_counter_struct_op (&si, lhs, rhs, true)) continue; /* Check for temp = ap[0].field. */ else if (va_list_counter_struct_op (&si, rhs, lhs, false)) continue; } So indeed, in the failing case, there is only one assignment to ap._offset and in the working case, there are two assignments, resulting in correct number of GPR bytes (== 2 registers) saved. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41089