[Lldb-commits] [PATCH] D122867: [trace][intel pt] Handle better tsc in the decoder

walter erquinigo via Phabricator via lldb-commits Thu, 31 Mar 2022 22:42:52 -0700

wallace created this revision.
wallace added reviewers: jj10306, zrthxn.
Herald added a project: All.
wallace requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.


A problem that I introduced in the decoder is that I was considering TSC 
decoding
errors as actual instruction errors, which mean that the trace has a gap. This 
is
wrong because a TSC decoding error doesn't mean that there's a gap in the trace.
Instead, now I'm just counting how many of these errors happened and I'm using
the `dump info` command to check for this number.

Besides that, I refactored the decoder a little bit to make it simpler, more
readable, and to handle TSCs in a cleaner way.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D122867

Files:
  lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
  lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
  lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
  lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
  lldb/test/API/commands/trace/TestTraceDumpInfo.py

Index: lldb/test/API/commands/trace/TestTraceDumpInfo.py
===================================================================
--- lldb/test/API/commands/trace/TestTraceDumpInfo.py
+++ lldb/test/API/commands/trace/TestTraceDumpInfo.py
@@ -41,4 +41,5 @@
   Raw trace size: 4 KiB
   Total number of instructions: 21
   Total approximate memory usage: 0.98 KiB
-  Average memory usage per instruction: 48.00 bytes'''])
+  Average memory usage per instruction: 48.00 bytes
+  Number of TSC decoding errors: 0'''])
Index: lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
===================================================================
--- lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -111,9 +111,9 @@
     return;
   }
   s.Printf("\n");
-
-  size_t insn_len = Decode(thread)->GetInstructions().size();
-  size_t mem_used = Decode(thread)->CalculateApproximateMemoryUsage();
+  DecodedThreadSP decoded_trace_sp = Decode(thread);
+  size_t insn_len = decoded_trace_sp->GetInstructions().size();
+  size_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage();
 
   s.Printf("  Raw trace size: %zu KiB\n", *raw_size / 1024);
   s.Printf("  Total number of instructions: %zu\n", insn_len);
@@ -122,6 +122,8 @@
   if (insn_len != 0)
     s.Printf("  Average memory usage per instruction: %0.2lf bytes\n",
              (double)mem_used / insn_len);
+  s.Printf("  Number of TSC decoding errors: %d\n",
+           decoded_trace_sp->GetTscErrorsCount());
   return;
 }
 
Index: lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
===================================================================
--- lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
+++ lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp
@@ -90,6 +90,59 @@
   return 0;
 }
 
+// Simple struct used by the decoder to keep the state of the most
+// recent TSC and a flag indicating whether TSCs are enabled, not enabled
+// or we just don't yet.
+struct TscInfo {
+  uint64_t tsc = 0;
+  LazyBool has_tsc = eLazyBoolCalculate;
+
+  explicit operator bool() const { return has_tsc == eLazyBoolYes; }
+};
+
+/// Query the decoder for the most recent TSC timestamp and update
+/// tsc_info accordingly.
+void RefreshTscInfo(TscInfo &tsc_info, pt_insn_decoder &decoder,
+                    DecodedThreadSP decoded_thread_sp) {
+  if (tsc_info.has_tsc == eLazyBoolNo)
+    return;
+
+  uint64_t new_tsc;
+  if (int tsc_error = pt_insn_time(&decoder, &new_tsc, nullptr, nullptr)) {
+    if (tsc_error == -pte_no_time) {
+      // We now know that the trace doesn't support TSC, so we won't try again.
+      // See
+      // https://github.com/intel/libipt/blob/master/doc/man/pt_qry_time.3.md
+      tsc_info.has_tsc = eLazyBoolNo;
+    } else {
+      // We don't add TSC decoding errors in the decoded trace itself to prevent
+      // creating unnecessary gaps, but we can count how many of these errors
+      // happened. In this case we reuse the previous correct TSC we saw, as
+      // it's better than no TSC at all.
+      decoded_thread_sp->ReportTscError(tsc_error);
+    }
+  } else {
+    tsc_info.tsc = new_tsc;
+    tsc_info.has_tsc = eLazyBoolYes;
+  }
+}
+
+static void AppendError(DecodedThreadSP &decoded_thread_sp, Error &&error,
+                        TscInfo &tsc_info) {
+  if (tsc_info)
+    decoded_thread_sp->AppendError(std::move(error), tsc_info.tsc);
+  else
+    decoded_thread_sp->AppendError(std::move(error));
+}
+
+static void AppendInstruction(DecodedThreadSP &decoded_thread_sp,
+                              const pt_insn &insn, TscInfo &tsc_info) {
+  if (tsc_info)
+    decoded_thread_sp->AppendInstruction(insn, tsc_info.tsc);
+  else
+    decoded_thread_sp->AppendInstruction(insn);
+}
+
 /// Decode all the instructions from a configured decoder.
 /// The decoding flow is based on
 /// https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#the-instruction-flow-decode-loop
@@ -98,60 +151,50 @@
 /// Error codes returned by libipt while decoding are:
 /// - negative: actual errors
 /// - positive or zero: not an error, but a list of bits signaling the status of
-/// the decoder
+/// the decoder, e.g. whether there are events that need to be decoded or not
 ///
 /// \param[in] decoder
 ///   A configured libipt \a pt_insn_decoder.
 static void DecodeInstructions(pt_insn_decoder &decoder,
-                               DecodedThreadSP &decoded_thread_sp) {
-  while (true) {
-    int errcode = FindNextSynchronizationPoint(decoder);
-    if (errcode == -pte_eos)
-      break;
+                               DecodedThreadSP decoded_thread_sp) {
 
-    if (errcode < 0) {
-      decoded_thread_sp->AppendError(make_error<IntelPTError>(errcode));
+  TscInfo tsc_info;
+  // We have this "global" errcode because if it's positive, we'll need
+  // its bits later to process events.
+  int errcode;
+
+  while (true) {
+    if ((errcode = FindNextSynchronizationPoint(decoder)) < 0) {
+      // We signal a gap only if it's not "end of stream"
+      if (errcode != -pte_eos)
+        AppendError(decoded_thread_sp, make_error<IntelPTError>(errcode),
+                    tsc_info);
       break;
     }
 
     // We have synchronized, so we can start decoding
     // instructions and events.
     while (true) {
-      errcode = ProcessPTEvents(decoder, errcode);
-      if (errcode < 0) {
-        decoded_thread_sp->AppendError(make_error<IntelPTError>(errcode));
+      if ((errcode = ProcessPTEvents(decoder, errcode)) < 0) {
+        AppendError(decoded_thread_sp, make_error<IntelPTError>(errcode),
+                    tsc_info);
         break;
       }
 
-      pt_insn insn;
-      errcode = pt_insn_next(&decoder, &insn, sizeof(insn));
-      if (errcode == -pte_eos)
-        break;
-
-      if (errcode < 0) {
-        decoded_thread_sp->AppendError(
-            make_error<IntelPTError>(errcode, insn.ip));
-        break;
-      }
+      // We refresh the TSC that might have changed after processing the events.
+      // See
+      // https://github.com/intel/libipt/blob/master/doc/man/pt_evt_next.3.md
+      RefreshTscInfo(tsc_info, decoder, decoded_thread_sp);
 
-      uint64_t time;
-      int time_error = pt_insn_time(&decoder, &time, nullptr, nullptr);
-      if (time_error == -pte_invalid) {
-        // This happens if we invoke the pt_insn_time method incorrectly,
-        // but the instruction is good though.
-        decoded_thread_sp->AppendError(
-            make_error<IntelPTError>(time_error, insn.ip));
-        decoded_thread_sp->AppendInstruction(insn);
+      pt_insn insn;
+      if ((errcode = pt_insn_next(&decoder, &insn, sizeof(insn))) < 0) {
+        // We signal a gap only if it's not "end of stream"
+        if (errcode != -pte_eos)
+          AppendError(decoded_thread_sp,
+                      make_error<IntelPTError>(errcode, insn.ip), tsc_info);
         break;
       }
-
-      if (time_error == -pte_no_time) {
-        // We simply don't have time information, i.e. None of TSC, MTC or CYC
-        // was enabled.
-        decoded_thread_sp->AppendInstruction(insn);
-      } else {
-        decoded_thread_sp->AppendInstruction(insn, time);
-      }
+      AppendInstruction(decoded_thread_sp, insn, tsc_info);
     }
   }
 }
Index: lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
===================================================================
--- lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
+++ lldb/source/Plugins/Trace/intel-pt/DecodedThread.h
@@ -195,6 +195,17 @@
   ///   points to a valid instruction.
   const char *GetErrorByInstructionIndex(size_t ins_idx);
 
+  /// Append a decoding error with a corresponding TSC.
+  void AppendError(llvm::Error &&error, uint64_t TSC);
+
+  /// Report an error decoding a TSC timestamp.
+  ///
+  /// See \a GetTscErrorsCount() for more documentation.
+  ///
+  /// \param[in] libipt_error_code
+  ///   An error returned by the libipt library.
+  void ReportTscError(int libipt_error_code);
+
   /// Get a new cursor for the decoded thread.
   lldb::TraceCursorUP GetCursor();
 
@@ -207,6 +218,14 @@
   ///   The size of the trace, or \b llvm::None if not available.
   llvm::Optional<size_t> GetRawTraceSize() const;
 
+  /// Return he number of TSC decoding errors that happened. A TSC error
+  /// is not a fatal error and doesn't create gaps in the trace. Instead
+  /// we only keep track of them as a statistic.
+  ///
+  /// \return
+  ///   The number of TSC decoding errors.
+  int GetTscErrorsCount() const;
+
   /// The approximate size in bytes used by this instance,
   /// including all the already decoded instructions.
   size_t CalculateApproximateMemoryUsage() const;
@@ -214,6 +233,10 @@
   lldb::ThreadSP GetThread();
 
 private:
+  /// Notify this class that the last added instruction or error has
+  /// an associated TSC.
+  void ReportTscForLastInstruction(uint64_t tsc);
+
   /// When adding new members to this class, make sure
   /// to update \a CalculateApproximateMemoryUsage() accordingly.
   lldb::ThreadSP m_thread_sp;
@@ -235,6 +258,7 @@
   /// The size in bytes of the raw buffer before decoding. It might be None if
   /// the decoding failed.
   llvm::Optional<size_t> m_raw_trace_size;
+  int m_tsc_errors = 0;
 };
 
 using DecodedThreadSP = std::shared_ptr<DecodedThread>;
Index: lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
===================================================================
--- lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
+++ lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp
@@ -86,12 +86,7 @@
 
 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
 
-void DecodedThread::AppendInstruction(const pt_insn &insn) {
-  m_instructions.emplace_back(insn);
-}
-
-void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
-  m_instructions.emplace_back(insn);
+void DecodedThread::ReportTscForLastInstruction(uint64_t tsc) {
   if (!m_last_tsc || *m_last_tsc != tsc) {
     // In case the first instructions are errors or did not have a TSC, we'll
     // get a first valid TSC not in position 0. We can safely force these error
@@ -103,11 +98,29 @@
   }
 }
 
+void DecodedThread::AppendInstruction(const pt_insn &insn) {
+  m_instructions.emplace_back(insn);
+}
+
+void DecodedThread::AppendInstruction(const pt_insn &insn, uint64_t tsc) {
+  AppendInstruction(insn);
+  ReportTscForLastInstruction(tsc);
+}
+
 void DecodedThread::AppendError(llvm::Error &&error) {
   m_errors.try_emplace(m_instructions.size(), toString(std::move(error)));
   m_instructions.emplace_back();
 }
 
+void DecodedThread::AppendError(llvm::Error &&error, uint64_t tsc) {
+  AppendError(std::move(error));
+  ReportTscForLastInstruction(tsc);
+}
+
+void DecodedThread::ReportTscError(int libipt_error_code) { m_tsc_errors++; }
+
+int DecodedThread::GetTscErrorsCount() const { return m_tsc_errors; }
+
 ArrayRef<IntelPTInstruction> DecodedThread::GetInstructions() const {
   return makeArrayRef(m_instructions);
 }
@@ -194,4 +207,4 @@
   auto prev_it = m_it;
   --prev_it;
   return TscRange(prev_it, *m_decoded_thread);
-}
\ No newline at end of file
+}

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

[Lldb-commits] [PATCH] D122867: [trace][intel pt] Handle better tsc in the decoder

Reply via email to