wallace created this revision. wallace added a reviewer: jj10306. Herald added a project: All. wallace requested review of this revision. Herald added a project: LLDB. Herald added a subscriber: lldb-commits.
This should be the final patch to support intel pt decoding per cpu. It kind of does its job by correlating subtraces delimited by PSB's with the context switch traces. Sadly, I'm finding some anomalies, like a user-space PSB happening during a kernel execution according to the context switch trace. tid = 0, core id = 29, in_tsc = 38890678707555985, out_tsc = 38890678718659020, psb_tsc = 38890678718646466 I have to fix these issues somehow, but I'm leaving this patch here for reference. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D126394 Files: lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h
Index: lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h =================================================================== --- lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h +++ lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h @@ -9,96 +9,12 @@ #ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H #define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H +#include "LibiptDecoder.h" #include "ThreadDecoder.h" namespace lldb_private { namespace trace_intel_pt { -/// This class indicates the time interval in which a thread was running -/// continuously on a cpu core. -/// -/// In most cases both endpoints of the intervals can be accurately recovered -/// from a context switch trace, but in some cases one of these endpoints might -/// be guessed or not known at all, due to contention problems in the trace or -/// because tracing was interrupted. -/// -/// Note: we use the terms CPU and cores interchangeably. -struct ThreadContinuousExecution { - enum class Variant { - /// Both endpoints are known - Complete, - /// The end is known and we have a guess for the start - HintedStart, - /// The start is known and we have a guess for the end - HintedEnd, - /// We only know the start. This might be the last entry of a core trace. - OnlyStart, - /// We only know the end. This might be the first entry or a core trace. - OnlyEnd, - } variant; - - union { - struct { - uint64_t start; - uint64_t end; - } complete; - struct { - uint64_t start; - } only_start; - struct { - uint64_t end; - } only_end; - /// The following 'hinted' structures are useful when there are contention - /// problems in the trace - struct { - uint64_t hinted_start; - uint64_t end; - } hinted_start; - struct { - uint64_t start; - uint64_t hinted_end; - } hinted_end; - } tscs; - - lldb::core_id_t core_id; - lldb::tid_t tid; - - /// \return - /// A tsc that we are certain of, either the start or the end. - uint64_t GetErrorFreeTSC() const; - - /// Constructors for the different variants of this object - /// - /// \{ - static ThreadContinuousExecution - CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start, uint64_t end); - - static ThreadContinuousExecution - CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t hinted_start, uint64_t end); - - static ThreadContinuousExecution - CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start, uint64_t hinted_end); - - static ThreadContinuousExecution - CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t end); - - static ThreadContinuousExecution - CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start); - /// \} - - /// Comparator by TSCs - bool operator<(const ThreadContinuousExecution &o) const; - -private: - ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid) - : core_id(core_id), tid(tid) {} -}; - /// Class used to decode a multi-core Intel PT trace. It assumes that each /// thread could have potentially been executed on different cores. It uses a /// context switch trace per CPU with timestamps to identify which thread owns @@ -143,6 +59,11 @@ /// by thread. llvm::Error DecodeContextSwitchTraces(); + void DecodeFromCore( + std::set<lldb::core_id_t>::iterator core_id, + llvm::DenseMap<lldb::core_id_t, llvm::ArrayRef<uint8_t>> &buffers, + DecodedThread &decoded_thread, Thread &thread); + TraceIntelPT &m_trace; std::set<lldb::core_id_t> m_cores; std::set<lldb::tid_t> m_tids; Index: lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp =================================================================== --- lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp +++ lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp @@ -101,6 +101,11 @@ return o; } +void ThreadContinuousExecution::AddIntelPTExecution( + const IntelPTContinuousExecution &intel_pt_execution) { + m_intel_pt_executions.push_back(intel_pt_execution); +} + bool ThreadContinuousExecution::operator<( const ThreadContinuousExecution &o) const { // We can compare by GetErrorFreeTSC because context switches across CPUs can @@ -181,7 +186,7 @@ static Error DecodePerfContextSwitchTrace( ArrayRef<uint8_t> data, core_id_t core_id, const LinuxPerfZeroTscConversion &tsc_conversion, - std::function<void(ThreadContinuousExecution &&execution)> + std::function<void(ThreadContinuousExecution execution)> on_new_thread_execution) { auto CreateError = [&](size_t offset, auto error) -> Error { return createStringError(inconvertibleErrorCode(), @@ -241,14 +246,36 @@ return m_tids.count(tid); } +void TraceIntelPTMultiCoreDecoder::DecodeFromCore( + std::set<core_id_t>::iterator core_id, + DenseMap<core_id_t, ArrayRef<uint8_t>> &buffers, + DecodedThread &decoded_thread, Thread &thread) { + if (core_id == m_cores.end()) { + DecodeTrace( + decoded_thread, m_trace, buffers, + m_continuous_executions_per_thread->find(thread.GetID())->second); + return; + } + cantFail(m_trace.OnCoreBinaryDataRead( + *core_id, IntelPTDataKinds::kTraceBuffer, + [&](ArrayRef<uint8_t> data) -> Error { + buffers.try_emplace(*core_id, data); + auto next_id = core_id; + next_id++; + DecodeFromCore(next_id, buffers, decoded_thread, thread); + return Error::success(); + })); +} + DecodedThreadSP TraceIntelPTMultiCoreDecoder::Decode(Thread &thread) { if (Error err = DecodeContextSwitchTraces()) return std::make_shared<DecodedThread>(thread.shared_from_this(), std::move(err)); - - return std::make_shared<DecodedThread>( - thread.shared_from_this(), - createStringError(inconvertibleErrorCode(), "unimplemented")); + DecodedThreadSP decoded_thread_sp = + std::make_shared<DecodedThread>(thread.shared_from_this()); + DenseMap<core_id_t, ArrayRef<uint8_t>> buffers; + DecodeFromCore(m_cores.begin(), buffers, *decoded_thread_sp, thread); + return decoded_thread_sp; } Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() { @@ -260,36 +287,80 @@ m_continuous_executions_per_thread.emplace(); - auto do_decode = [&]() -> Error { - // We'll decode all context switch traces, identify continuous executions - // and group them by thread. + auto correlate_context_switches_and_intel_pt_traces = [&]() -> Error { for (core_id_t core_id : m_cores) { + std::vector<IntelPTContinuousExecution> intel_pt_executions; + Error err = m_trace.OnCoreBinaryDataRead( + core_id, IntelPTDataKinds::kTraceBuffer, + [&](ArrayRef<uint8_t> data) -> Error { + Expected<std::vector<IntelPTContinuousExecution>> split_trace = + SplitTraceInContinuousExecutions(m_trace, data); + if (!split_trace) + return split_trace.takeError(); + intel_pt_executions = std::move(*split_trace); + + return Error::success(); + }); + if (err) + return err; + + auto it = intel_pt_executions.begin(); + auto on_new_thread_execution = [&](ThreadContinuousExecution execution) { + if (execution.variant == ThreadContinuousExecution::Variant::Complete) { + for (; it != intel_pt_executions.end() && + it->tsc < execution.tscs.complete.end; + it++) { + if (it->tsc > execution.tscs.complete.start) { + execution.AddIntelPTExecution(*it); + } + } + } + (*m_continuous_executions_per_thread)[execution.tid].push_back( + std::move(execution)); + }; + err = m_trace.OnCoreBinaryDataRead( core_id, IntelPTDataKinds::kPerfContextSwitchTrace, [&](ArrayRef<uint8_t> data) -> Error { - return DecodePerfContextSwitchTrace( - data, core_id, m_tsc_conversion, - [&](const ThreadContinuousExecution &execution) { - (*m_continuous_executions_per_thread)[execution.tid] - .push_back(execution); - }); + return DecodePerfContextSwitchTrace(data, core_id, m_tsc_conversion, + on_new_thread_execution); }); - if (err) { - m_setup_error = toString(std::move(err)); - return createStringError(inconvertibleErrorCode(), - m_setup_error->c_str()); - } + if (err) + return err; } - // We now sort the executions of each to have them ready for instruction - // decoding + // We now sort the executions of each thread to have them ready for + // instruction decoding for (auto &tid_executions : *m_continuous_executions_per_thread) std::sort(tid_executions.second.begin(), tid_executions.second.end()); + for (auto &tid_executions : *m_continuous_executions_per_thread) { + for (auto &thread_execution : tid_executions.second) { + for (auto &intel_pt_execution : + thread_execution.m_intel_pt_executions) { + printf("%s\n", formatv("tid = {0}, core id = {1}, offset = {2}, " + "in_tsc = {3}, out_tsc = {4}, psb_tsc = {5}", + thread_execution.tid, thread_execution.core_id, + intel_pt_execution.psb_offset, + thread_execution.tscs.complete.start, + thread_execution.tscs.complete.end, + intel_pt_execution.tsc) + .str() + .c_str()); + } + } + } + return Error::success(); }; - return m_trace.GetTimer().ForGlobal().TimeTask<Error>( - "Context switch trace decoding", do_decode); + Error err = m_trace.GetTimer().ForGlobal().TimeTask<Error>( + "Context switch and Intel PT traces correlation", + correlate_context_switches_and_intel_pt_traces); + if (err) { + m_setup_error = toString(std::move(err)); + return createStringError(inconvertibleErrorCode(), m_setup_error->c_str()); + } + return Error::success(); } size_t TraceIntelPTMultiCoreDecoder::GetNumContinuousExecutionsForThread( Index: lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h =================================================================== --- lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h +++ lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h @@ -12,17 +12,124 @@ #include "intel-pt.h" #include "DecodedThread.h" + #include "forward-declarations.h" namespace lldb_private { namespace trace_intel_pt { +struct IntelPTContinuousExecution { + uint64_t tsc; + lldb::core_id_t core_id; + uint64_t psb_offset; + uint64_t next_psb_offset; +}; + +/// This class indicates the time interval in which a thread was running +/// continuously on a cpu core. +/// +/// In most cases both endpoints of the intervals can be accurately recovered +/// from a context switch trace, but in some cases one of these endpoints might +/// be guessed or not known at all, due to contention problems in the trace or +/// because tracing was interrupted. +/// +/// Note: we use the terms CPU and cores interchangeably. +struct ThreadContinuousExecution { + enum class Variant { + /// Both endpoints are known + Complete, + /// The end is known and we have a guess for the start + HintedStart, + /// The start is known and we have a guess for the end + HintedEnd, + /// We only know the start. This might be the last entry of a core trace. + OnlyStart, + /// We only know the end. This might be the first entry or a core trace. + OnlyEnd, + } variant; + + union { + struct { + uint64_t start; + uint64_t end; + } complete; + struct { + uint64_t start; + } only_start; + struct { + uint64_t end; + } only_end; + /// The following 'hinted' structures are useful when there are contention + /// problems in the trace + struct { + uint64_t hinted_start; + uint64_t end; + } hinted_start; + struct { + uint64_t start; + uint64_t hinted_end; + } hinted_end; + } tscs; + + lldb::core_id_t core_id; + lldb::tid_t tid; + + /// \return + /// A tsc that we are certain of, either the start or the end. + uint64_t GetErrorFreeTSC() const; + + /// Constructors for the different variants of this object + /// + /// \{ + static ThreadContinuousExecution + CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start, uint64_t end); + + static ThreadContinuousExecution + CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t hinted_start, uint64_t end); + + static ThreadContinuousExecution + CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start, uint64_t hinted_end); + + static ThreadContinuousExecution + CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t end); + + static ThreadContinuousExecution + CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start); + /// \} + + void + AddIntelPTExecution(const IntelPTContinuousExecution &intel_pt_execution); + + /// Comparator by TSCs + bool operator<(const ThreadContinuousExecution &o) const; + + std::vector<IntelPTContinuousExecution> m_intel_pt_executions; + +private: + ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid) + : core_id(core_id), tid(tid) {} +}; + /// Decode a raw Intel PT trace given in \p buffer and append the decoded /// instructions and errors in \p decoded_thread. It uses the low level libipt /// library underneath. void DecodeTrace(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, llvm::ArrayRef<uint8_t> buffer); +void DecodeTrace( + DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, + const llvm::DenseMap<lldb::core_id_t, llvm::ArrayRef<uint8_t>> &buffers, + const std::vector<ThreadContinuousExecution> &executions); + +llvm::Expected<std::vector<IntelPTContinuousExecution>> +SplitTraceInContinuousExecutions(TraceIntelPT &trace_intel_pt, + llvm::ArrayRef<uint8_t> buffer); + } // namespace trace_intel_pt } // namespace lldb_private Index: lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp =================================================================== --- lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -26,6 +26,96 @@ explicit operator bool() const { return has_tsc == eLazyBoolYes; } }; +class LibiptSplitter { +public: + LibiptSplitter(pt_insn_decoder &decoder) : m_decoder(decoder) {} + + std::vector<IntelPTContinuousExecution> SplitTraceInContinuousExecutions() { + int status = pte_ok; + std::vector<IntelPTContinuousExecution> executions; + while (!IsLibiptError(status = FindNextSynchronizationPoint())) { + if (IsLibiptError(status = ProcessPTEvents(status))) + continue; + + Optional<uint64_t> tsc = FetchTsc(); + if (!tsc) + continue; + uint64_t psb_offset = 0; + if (!IsLibiptError(pt_insn_get_sync_offset(&m_decoder, &psb_offset))) { + executions.push_back({ + *tsc, + 0, // core id + psb_offset, // uint64_t offset; + 0, // next_psb_offset + }); + } + } + for (size_t i = 0; i + 1 < executions.size(); i++) + executions[i].next_psb_offset = executions[i + 1].psb_offset; + return executions; + } + +private: + Optional<uint64_t> FetchTsc() { + uint64_t tsc; + int tsc_status; + if (IsLibiptError(tsc_status = + pt_insn_time(&m_decoder, &tsc, nullptr, nullptr))) { + return None; + } + return tsc; + } + + int ProcessPTEvents(int status) { + while (status & pts_event_pending) { + pt_event event; + status = pt_insn_event(&m_decoder, &event, sizeof(event)); + if (IsLibiptError(status)) { + return status; + } + } + return pte_ok; + } + int FindNextSynchronizationPoint() { + // Try to sync the decoder. If it fails, then get the decoder_offset and + // try to sync again from the next synchronization point. If the + // new_decoder_offset is same as decoder_offset then we can't move to the + // next synchronization point. Otherwise, keep resyncing until either end + // of trace stream (eos) is reached or pt_insn_sync_forward() passes. + int status = pt_insn_sync_forward(&m_decoder); + + if (!IsEndOfStream(status) && IsLibiptError(status)) { + uint64_t decoder_offset = 0; + int errcode_off = pt_insn_get_offset(&m_decoder, &decoder_offset); + if (!IsLibiptError(errcode_off)) { // we could get the offset + while (true) { + status = pt_insn_sync_forward(&m_decoder); + if (!IsLibiptError(status) || IsEndOfStream(status)) + break; + + uint64_t new_decoder_offset = 0; + errcode_off = pt_insn_get_offset(&m_decoder, &new_decoder_offset); + if (IsLibiptError(errcode_off)) + break; // We can't further synchronize. + else if (new_decoder_offset <= decoder_offset) { + // We tried resyncing the decoder and it didn't make any progress + // because the offset didn't change. We will not make any further + // progress. Hence, we stop in this situation. + break; + } + // We'll try again starting from a new offset. + decoder_offset = new_decoder_offset; + } + } + } + + return status; + } + + pt_insn_decoder &m_decoder; + TscInfo m_tsc_info; +}; + /// Class that decodes a raw buffer for a single thread using the low level /// libipt library. /// @@ -62,6 +152,26 @@ } } + void ResetDecoderOffset(uint64_t offset) { + int error = pte_ok; + if (IsLibiptError(error = pt_insn_sync_set(&m_decoder, offset))) + m_decoded_thread.Append(DecodedInstruction(error)); + } + + void DecodeUntilOffset(uint64_t end_offset) { + int status = pte_ok; + while (!IsLibiptError(status = FindNextSynchronizationPoint())) { + uint64_t cur_offset; + pt_insn_get_offset(&m_decoder, &cur_offset); + if (cur_offset > end_offset) + break; + // We have synchronized, so we can start decoding instructions and + // events. + // Multiple loops indicate gaps in the trace. + DecodeInstructionsAndEvents(status, end_offset); + } + } + private: /// Invoke the low level function \a pt_insn_next and store the decoded /// instruction in the given \a DecodedInstruction. @@ -80,8 +190,15 @@ /// /// \param[in] status /// The status that was result of synchronizing to the most recent PSB. - void DecodeInstructionsAndEvents(int status) { + void DecodeInstructionsAndEvents(int status, + Optional<uint64_t> end_offset = None) { while (DecodedInstruction insn = ProcessPTEvents(status)) { + if (end_offset) { + uint64_t cur_offset; + pt_insn_get_offset(&m_decoder, &cur_offset); + if (cur_offset > *end_offset) + break; + } // The status returned by DecodeNextInstruction will need to be processed // by ProcessPTEvents in the next loop if it is not an error. if (IsLibiptError(status = DecodeNextInstruction(insn))) { @@ -264,8 +381,7 @@ std::unique_ptr<pt_insn_decoder, decltype(DecoderDeleter)>; static Expected<PtInsnDecoderUP> -CreateInstructionDecoder(DecodedThread &decoded_thread, - TraceIntelPT &trace_intel_pt, +CreateInstructionDecoder(TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer) { Expected<pt_cpu> cpu_info = trace_intel_pt.GetCPUInfo(); if (!cpu_info) @@ -287,25 +403,73 @@ pt_insn_decoder *decoder_ptr = pt_insn_alloc_decoder(&config); if (!decoder_ptr) return make_error<IntelPTError>(-pte_nomem); - PtInsnDecoderUP decoder_up(decoder_ptr, DecoderDeleter); - pt_image *image = pt_insn_get_image(decoder_ptr); - Process *process = decoded_thread.GetThread()->GetProcess().get(); + return PtInsnDecoderUP(decoder_ptr, DecoderDeleter); +} + +static Error SetupMemoryImages(PtInsnDecoderUP &decoder_up, Process &process) { + pt_image *image = pt_insn_get_image(decoder_up.get()); + int status = pte_ok; if (IsLibiptError( - status = pt_image_set_callback(image, ReadProcessMemory, process))) + status = pt_image_set_callback(image, ReadProcessMemory, &process))) return make_error<IntelPTError>(status); - return decoder_up; + return Error::success(); } void lldb_private::trace_intel_pt::DecodeTrace(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, ArrayRef<uint8_t> buffer) { Expected<PtInsnDecoderUP> decoder_up = - CreateInstructionDecoder(decoded_thread, trace_intel_pt, buffer); + CreateInstructionDecoder(trace_intel_pt, buffer); if (!decoder_up) return decoded_thread.SetAsFailed(decoder_up.takeError()); + if (Error err = SetupMemoryImages(*decoder_up, + *decoded_thread.GetThread()->GetProcess())) + return decoded_thread.SetAsFailed(std::move(err)); + LibiptDecoder libipt_decoder(*decoder_up.get(), decoded_thread); libipt_decoder.DecodeUntilEndOfTrace(); } + +void lldb_private::trace_intel_pt::DecodeTrace( + DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, + const DenseMap<lldb::core_id_t, llvm::ArrayRef<uint8_t>> &buffers, + const std::vector<ThreadContinuousExecution> &executions) { + DenseMap<lldb::core_id_t, LibiptDecoder> decoders; + for (auto &core_id_buffer : buffers) { + Expected<PtInsnDecoderUP> decoder_up = + CreateInstructionDecoder(trace_intel_pt, core_id_buffer.second); + if (!decoder_up) + return decoded_thread.SetAsFailed(decoder_up.takeError()); + + if (Error err = SetupMemoryImages( + *decoder_up, *decoded_thread.GetThread()->GetProcess())) + return decoded_thread.SetAsFailed(std::move(err)); + + decoders.try_emplace(core_id_buffer.first, + LibiptDecoder(*decoder_up.get(), decoded_thread)); + } + + for (const ThreadContinuousExecution &execution : executions) { + LibiptDecoder &decoder = decoders.find(execution.core_id)->second; + for (const IntelPTContinuousExecution &intel_pt_execution : + execution.m_intel_pt_executions) { + decoder.ResetDecoderOffset(intel_pt_execution.psb_offset); + decoder.DecodeUntilOffset(intel_pt_execution.next_psb_offset); + } + } +} + +Expected<std::vector<IntelPTContinuousExecution>> +lldb_private::trace_intel_pt::SplitTraceInContinuousExecutions( + TraceIntelPT &trace_intel_pt, llvm::ArrayRef<uint8_t> buffer) { + Expected<PtInsnDecoderUP> decoder_up = + CreateInstructionDecoder(trace_intel_pt, buffer); + if (!decoder_up) + return decoder_up.takeError(); + + LibiptSplitter splitter(*decoder_up.get()); + return splitter.SplitTraceInContinuousExecutions(); +}
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits