Skip to content

Commit ca17aa8

Browse files
authored
[ATT] [CI fix] warmup async copies before starting the trace (#5684)
## Motivation Adds a warmup to async copies before starting thread trace. This avoids data loss in the first buffer flip. ## Technical Details <!-- Explain the changes along with any relevant GitHub links. --> ## JIRA ID <!-- If applicable, mention the JIRA ID resolved by this PR (Example: Resolves SWDEV-12345). --> <!-- Do not post any JIRA links here. --> ## Test Plan <!-- Explain any relevant testing done to verify this PR. --> ## Test Result <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [ ] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
1 parent 05d2704 commit ca17aa8

3 files changed

Lines changed: 19 additions & 4 deletions

File tree

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/core.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,21 @@ ThreadTracerAgent::start_thread_trace(std::shared_ptr<std::atomic<int>> _flag)
258258
control_packet_copy->populate_before();
259259
control_packet_copy->populate_after();
260260

261+
// Warmup the async copy so we dont wait too long for the flip.
262+
if(params.triple_buffering)
263+
{
264+
auto& buffer = queue->triple_buffer_memory;
265+
auto signal = signal_create();
266+
copy_data_sync(buffer.at(0),
267+
buffer.at(1),
268+
queue->near_cpu,
269+
queue->hsa_agent,
270+
MIN_BUFFER_SIZE,
271+
&signal);
272+
signal_wait(signal);
273+
signal_destroy(signal);
274+
}
275+
261276
// Submit the start packets without waiting: the producer thread (triple-buffer
262277
// path) and DeviceThreadTracer::start_context (single-buffer path) wait on the
263278
// returned signal so multiple agents can be launched in parallel.

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/hsa_util.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ att_queue_submit_signal_last(const att_queue_t& q, VecType& vec)
115115
/// waiting on the last signal guarantees the entire batch has drained.
116116
template <typename VecType>
117117
signal_ptr_t
118-
att_queue_submit_and_signal_last(const att_queue_t& q, VecType& vec)
118+
att_queue_submit_and_wait_last(const att_queue_t& q, VecType& vec)
119119
{
120120
auto sig = att_queue_submit_signal_last(q, vec);
121121
if(sig) signal_wait(*sig);

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/threading.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ producer_loop(
203203

204204
auto stop_trace = [&]() {
205205
ROCP_INFO << "Stopping the trace";
206-
att_queue_submit_and_signal_last(queue, parameters.control_packet->after_krn_pkt);
206+
att_queue_submit_and_wait_last(queue, parameters.control_packet->after_krn_pkt);
207207
};
208208

209209
auto iterate_trace = [&]() {
@@ -265,8 +265,8 @@ producer_loop(
265265
ROCPROFILER_THREAD_TRACE_SHADER_DATA_FLAGS_NONE,
266266
true);
267267

268-
att_queue_submit_and_signal_last(queue,
269-
parameters.control_packet->before_krn_pkt);
268+
att_queue_submit_and_wait_last(queue,
269+
parameters.control_packet->before_krn_pkt);
270270
}
271271
}
272272
// The status_query test verifies we immediately poll again after consuming a

0 commit comments

Comments
 (0)