Hi,

I've hit some issue trying to unwind a clang compiled program with fpo
enabled on x86. I've found a fix that I would have cleaned up and
submitted a pull request if not because I'm partially confused by the
comment there.

The issue is very similar to the one worked around in
https://github.com/libunwind/libunwind/commit/3d9a694de85f2ba10368b4fbc2aff1c6b8b76f58
and happens to me when there's a cfi right after the call instruction
of a noreturn function. GCC usually emits a `restore_state` in that
case which is what the above patch works around. However, that is
definately not mandated and clang actually emits normal
`def_cfi_offset`s which cause libunwind to mess up badly....

A few notes about the code that reproduces the issue,
FPO plays a role since otherwise the cfa is ebp and doesn't need
changing all the time. X86 is especially susceptible to the problem
since basically all calls need to adjust the sp (for arguments). The
program to reproduce is attached (unw.c) and can be compiled with `
-O3 -lunwind -fomit-frame-pointer -fasynchronous-unwind-tables -fno-P
IE -g -Wl,--export-dynamic -fno-stack-protector
` options with gcc to see the expected behavior and with clang to see the issue.

The patch above claims that the signal frame handling can be broken
when fixing this issue which I can reproduce and although I didn't
track it doesn directly I guess it's caused by `fetch_proc_info`
updating the c->use_prev_instr to that for the next frame too early. I
could reproduce the failure with the naive fix and I can fix that
failure by delaying that update (however, those tests doesn't seems to
be particularly reliable and the main difference is how often it
fails. The difference may or may not be statistically significant...).

That leave the final question of what the comment around the old logic
mean. The comment

  /* Process everything up to and including the current 'end_ip',
     including all the DW_CFA_advance_loc instructions.  See
     'c->use_prev_instr' use in 'fetch_proc_info' for details. */

and

     For execution resume, we need to do the exact opposite and look
     up using the current 'ip' value.  That is where execution will
     continue, and it's important we get this right, as 'ip' could be
     right at the function entry and hence FDE edge, or at instruction
     that manipulates CFA (push/pop). */

Suggests that this is intended. I can see it's reasoning when applying
for unwinding the stack for continuing execution of the program (more
like early return?) and I guess for callee cleanup calling convention
the call instruction can indeed affect the stack. However, I don't
think that argument applies for obtaining back traces and since I only
use libunwind for stack traces I'll need some input of if this fix
actually affect that or what will be a better fix.

The patch is also attached.
From 0abca47e977bdb882beae6c56194f2f005e895e3 Mon Sep 17 00:00:00 2001
From: Yichao Yu <[email protected]>
Date: Fri, 27 Oct 2017 13:50:58 +0000
Subject: [PATCH] .....

---
 src/dwarf/Gparser.c | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
index 70e690f..438a78a 100644
--- a/src/dwarf/Gparser.c
+++ b/src/dwarf/Gparser.c
@@ -115,7 +115,7 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr,
   /* Process everything up to and including the current 'end_ip',
      including all the DW_CFA_advance_loc instructions.  See
      'c->use_prev_instr' use in 'fetch_proc_info' for details. */
-  while (*ip <= end_ip && *addr < end_addr && ret >= 0)
+  while (*ip + c->use_prev_instr <= end_ip && *addr < end_addr && ret >= 0)
     {
       unw_word_t operand = 0, regnum, val, len;
       uint8_t u8, op;
@@ -289,10 +289,8 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr,
               ret = -UNW_EINVAL;
               break;
             }
-          if (*ip < end_ip) {
-            memcpy (&sr->rs_current, &(*rs_stack)->state, sizeof (sr->rs_current));
-            pop_rstate_stack(rs_stack);
-          }
+          memcpy (&sr->rs_current, &(*rs_stack)->state, sizeof (sr->rs_current));
+          pop_rstate_stack(rs_stack);
           Debug (15, "CFA_restore_state\n");
           break;
 
@@ -384,11 +382,8 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr,
         case DW_CFA_GNU_args_size:
           if ((ret = dwarf_read_uleb128 (as, a, addr, &val, arg)) < 0)
             break;
-          if (*ip < end_ip)
-            {
-              sr->args_size = val;
-              Debug (15, "CFA_GNU_args_size %lu\n", (long) val);
-            }
+          sr->args_size = val;
+          Debug (15, "CFA_GNU_args_size %lu\n", (long) val);
           break;
 
         case DW_CFA_GNU_negative_offset_extended:
@@ -429,7 +424,7 @@ run_cfi_program (struct dwarf_cursor *c, dwarf_state_record_t *sr,
 }
 
 static int
-fetch_proc_info (struct dwarf_cursor *c, unw_word_t ip, int need_unwind_info)
+fetch_proc_info (struct dwarf_cursor *c, unw_word_t ip)
 {
   int ret, dynamic = 1;
 
@@ -475,14 +470,6 @@ fetch_proc_info (struct dwarf_cursor *c, unw_word_t ip, int need_unwind_info)
   if (ret >= 0)
     tdep_fetch_frame (c, ip, 1);
 
-  /* Update use_prev_instr for the next frame. */
-  if (need_unwind_info)
-  {
-    assert(c->pi.unwind_info);
-    struct dwarf_cie_info *dci = c->pi.unwind_info;
-    c->use_prev_instr = ! dci->signal_frame;
-  }
-
   return ret;
 }
 
@@ -914,10 +901,17 @@ find_reg_state (struct dwarf_cursor *c, dwarf_state_record_t *sr)
     }
   else
     {
-      ret = fetch_proc_info (c, c->ip, 1);
-      if (ret >= 0)
+      ret = fetch_proc_info (c, c->ip);
+      int next_use_prev_instr = c->use_prev_instr;
+      if (ret >= 0) {
+	/* Update use_prev_instr for the next frame. */
+	assert(c->pi.unwind_info);
+	struct dwarf_cie_info *dci = c->pi.unwind_info;
+	next_use_prev_instr = ! dci->signal_frame;
 	ret = create_state_record_for (c, sr, c->ip);
+      }
       put_unwind_info (c, &c->pi);
+      c->use_prev_instr = next_use_prev_instr;
 
       if (cache && ret >= 0)
 	{
@@ -972,7 +966,7 @@ dwarf_make_proc_info (struct dwarf_cursor *c)
   int ret;
 
   /* Lookup it up the slow way... */
-  ret = fetch_proc_info (c, c->ip, 0);
+  ret = fetch_proc_info (c, c->ip);
   if (ret >= 0)
       ret = create_state_record_for (c, &sr, c->ip);
   put_unwind_info (c, &c->pi);
@@ -1006,8 +1000,8 @@ dwarf_reg_states_table_iterate(struct dwarf_cursor *c,
   while (ret >= 0 && curr_ip < c->pi.end_ip && addr < dci->fde_instr_end)
     {
       unw_word_t prev_ip = curr_ip;
-      ret = run_cfi_program (c, &sr, &curr_ip, prev_ip, &addr, dci->fde_instr_end,
-			     &rs_stack, dci);
+      ret = run_cfi_program (c, &sr, &curr_ip, prev_ip + c->use_prev_instr, &addr,
+			     dci->fde_instr_end, &rs_stack, dci);
       if (ret >= 0 && prev_ip < curr_ip)
 	ret = cb(token, &sr.rs_current, sizeof(sr.rs_current), prev_ip, curr_ip);
     }
@@ -1029,8 +1023,13 @@ dwarf_reg_states_iterate(struct dwarf_cursor *c,
 			 unw_reg_states_callback cb,
 			 void *token)
 {
-  int ret = fetch_proc_info (c, c->ip, 1);
+  int ret = fetch_proc_info (c, c->ip);
+  int next_use_prev_instr = c->use_prev_instr;
   if (ret >= 0)
+    /* Update use_prev_instr for the next frame. */
+    assert(c->pi.unwind_info);
+    struct dwarf_cie_info *dci = c->pi.unwind_info;
+    next_use_prev_instr = ! dci->signal_frame;
     switch (c->pi.format)
       {
       case UNW_INFO_FORMAT_TABLE:
@@ -1047,6 +1046,7 @@ dwarf_reg_states_iterate(struct dwarf_cursor *c,
 	ret = -UNW_EINVAL;
       }
   put_unwind_info (c, &c->pi);
+  c->use_prev_instr = next_use_prev_instr;
   return ret;
 }
 
-- 
2.14.3

//

#define UNW_LOCAL_ONLY
#include <libunwind.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <execinfo.h>
#include <setjmp.h>

__attribute__((noinline)) void unw(void)
{
    printf("Unwinding.\n");
    unw_context_t context;
    memset(&context, 0, sizeof(context));
    unw_getcontext(&context);
    unw_cursor_t cursor;
    if (unw_init_local(&cursor, &context) != 0) {
        printf("Init failed\n");
        return;
    }
    while (1) {
        unw_word_t reg;
        if (unw_get_reg(&cursor, UNW_REG_IP, &reg) < 0) {
            printf("Can't get RIP\n");
            return;
        }
        // printf("  RIP: %p\n", (void*)(uintptr_t)reg);
        /* reg--; */
        backtrace_symbols_fd((void *const*)&reg, 1, 1);
        /* if (unw_get_reg(&cursor, UNW_REG_SP, &reg) < 0) { */
        /*     printf("Can't get RSP\n"); */
        /*     return; */
        /* } */
        /* printf("RSP: %p\n", (void*)(uintptr_t)reg); */
        if (unw_step(&cursor) <= 0) {
            printf("Done\n");
            return;
        }
    }
}

jmp_buf start;

__attribute__((noinline,noreturn)) void throw(void *p, void *p2, void *p3, void *p4, int a)
{
    unw();
    asm volatile ("" :: "r"(p), "r"(p2), "r"(p3), "r"(p4) : "memory");
    if (a) {
        longjmp(start, 1);
    }
    else {
        _exit(0);
    }
}

__attribute__((noinline)) void not_throw(void *p)
{
    /* unw(); */
    asm volatile ("" :: "r"(p) : "memory");
}

__attribute__((noinline)) void f(int a, int b)
{
    if (a) {
        uint32_t p = a;
        not_throw(&p);
        throw(&p, &a, &b, &p, a);
    }
    else {
        not_throw(&a);
        int p = 2;
        throw(&b, &p, &a, &b, a);
    }
}

__attribute__((noinline)) void g(int a, int b)
{
    f(a, b - a);
}

__attribute__((noinline)) void h(int a, int b)
{
    g(a + b, -a);
}

volatile int flag = 1;
volatile int flag2 = 0;

int main()
{
    if (setjmp(start) == 0) {
        h(flag, flag2);
    }
    else {
        h(0, flag2);
    }
    return 0;
}
_______________________________________________
Libunwind-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/libunwind-devel

Reply via email to