Skip to content

Commit bfdde0c

Browse files
committed
sync: Fix regression due to old refactor
Questionable refactor: #8702
1 parent 0032cee commit bfdde0c

6 files changed

Lines changed: 94 additions & 62 deletions

File tree

layers/sync/sync_validation.cpp

Lines changed: 41 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2560,7 +2560,7 @@ bool SyncValidator::PropagateTimelineSignals(SignalsUpdate& signals_update, cons
25602560
// Each iteration uses registered timeline signals to resolve existing unresolved batches.
25612561
// Each resolved batch can generate new timeline signals which can resolve more unresolved batches on the next iteration.
25622562
// This finishes when all unresolved batches are resolved or when iteration does not generate new timeline signals.
2563-
while (PropagateTimelineSignalsIteration(queues, signals_update, skip, error_obj)) {
2563+
while (ProcessUnresolvedBatches(queues, signals_update, skip, error_obj)) {
25642564
;
25652565
}
25662566

@@ -2573,21 +2573,55 @@ bool SyncValidator::PropagateTimelineSignals(SignalsUpdate& signals_update, cons
25732573
return skip;
25742574
}
25752575

2576-
bool SyncValidator::PropagateTimelineSignalsIteration(std::vector<UnresolvedQueue>& queues, SignalsUpdate& signals_update,
2577-
bool& skip, const ErrorObject& error_obj) const {
2576+
bool SyncValidator::ProcessUnresolvedBatches(std::vector<UnresolvedQueue>& queues, SignalsUpdate& signals_update, bool& skip,
2577+
const ErrorObject& error_obj) const {
25782578
bool has_new_timeline_signals = false;
25792579
for (auto& queue : queues) {
25802580
if (queue.unresolved_batches.empty()) {
25812581
continue; // all batches for this queue were resolved by previous iterations
25822582
}
2583+
// Resolve waits that have matching signal
2584+
for (UnresolvedBatch& unresolved_batch : queue.unresolved_batches) {
2585+
auto it = unresolved_batch.unresolved_waits.begin();
2586+
while (it != unresolved_batch.unresolved_waits.end()) {
2587+
const VkSemaphoreSubmitInfo& wait_info = *it;
2588+
auto resolving_signal = signals_update.OnTimelineWait(wait_info.semaphore, wait_info.value);
2589+
if (!resolving_signal.has_value()) {
2590+
++it;
2591+
continue; // resolving signal not found, the wait stays unresolved
2592+
}
2593+
if (resolving_signal->batch) { // null for host signals
2594+
unresolved_batch.batch->ResolveSubmitSemaphoreWait(*resolving_signal, wait_info.stageMask);
2595+
unresolved_batch.batch->ImportTags(*resolving_signal->batch);
2596+
unresolved_batch.resolved_dependencies.emplace_back(resolving_signal->batch);
2597+
}
2598+
it = unresolved_batch.unresolved_waits.erase(it);
2599+
}
2600+
}
25832601

25842602
BatchContextPtr last_batch = queue.queue_state->LastBatch();
25852603
const BatchContextPtr initial_last_batch = last_batch;
25862604

2587-
while (!queue.unresolved_batches.empty()) {
2588-
auto& unresolved_batch = queue.unresolved_batches.front();
2605+
// Process batches that do not have unresolved waits anymore.
2606+
// Stop when find a batch with unresolved waits or when all the batches are processed.
2607+
while (!queue.unresolved_batches.empty() && queue.unresolved_batches.front().unresolved_waits.empty()) {
2608+
UnresolvedBatch& ready_batch = queue.unresolved_batches.front();
2609+
2610+
// Import the previous batch information
2611+
if (last_batch && !vvl::Contains(ready_batch.resolved_dependencies, last_batch)) {
2612+
ready_batch.batch->ResolveLastBatch(last_batch);
2613+
ready_batch.resolved_dependencies.emplace_back(std::move(last_batch));
2614+
}
2615+
last_batch = ready_batch.batch;
2616+
2617+
const auto async_batches = ready_batch.batch->RegisterAsyncContexts(ready_batch.resolved_dependencies);
2618+
2619+
skip |= ready_batch.batch->ValidateSubmit(ready_batch.command_buffers, ready_batch.submit_index,
2620+
ready_batch.batch_index, ready_batch.label_stack, error_obj);
25892621

2590-
has_new_timeline_signals |= ProcessUnresolvedBatch(unresolved_batch, signals_update, last_batch, skip, error_obj);
2622+
// Process signals. New timeline signals can resolve more batches on the next iteration
2623+
const auto submit_signals = vvl::make_span(ready_batch.signals.data(), ready_batch.signals.size());
2624+
has_new_timeline_signals |= signals_update.RegisterSignals(ready_batch.batch, submit_signals);
25912625

25922626
// Remove processed batch from the (local) unresolved list
25932627
queue.unresolved_batches.erase(queue.unresolved_batches.begin());
@@ -2597,54 +2631,14 @@ bool SyncValidator::PropagateTimelineSignalsIteration(std::vector<UnresolvedQueu
25972631

25982632
stats.RemoveUnresolvedBatch();
25992633
}
2634+
26002635
if (last_batch != initial_last_batch) {
26012636
queue.queue_state->SetLastBatch(std::move(last_batch));
26022637
}
26032638
}
26042639
return has_new_timeline_signals;
26052640
}
26062641

2607-
bool SyncValidator::ProcessUnresolvedBatch(UnresolvedBatch& unresolved_batch, SignalsUpdate& signals_update,
2608-
BatchContextPtr& last_batch, bool& skip, const ErrorObject& error_obj) const {
2609-
// Resolve waits that have matching signal
2610-
auto it = unresolved_batch.unresolved_waits.begin();
2611-
while (it != unresolved_batch.unresolved_waits.end()) {
2612-
const VkSemaphoreSubmitInfo& wait_info = *it;
2613-
auto resolving_signal = signals_update.OnTimelineWait(wait_info.semaphore, wait_info.value);
2614-
if (!resolving_signal.has_value()) {
2615-
++it;
2616-
continue; // resolving signal not found, the wait stays unresolved
2617-
}
2618-
if (resolving_signal->batch) { // null for host signals
2619-
unresolved_batch.batch->ResolveSubmitSemaphoreWait(*resolving_signal, wait_info.stageMask);
2620-
unresolved_batch.batch->ImportTags(*resolving_signal->batch);
2621-
unresolved_batch.resolved_dependencies.emplace_back(resolving_signal->batch);
2622-
}
2623-
it = unresolved_batch.unresolved_waits.erase(it);
2624-
}
2625-
2626-
// This batch still has unresolved waits
2627-
if (!unresolved_batch.unresolved_waits.empty()) {
2628-
return false; // no new timeline signals were registered
2629-
}
2630-
2631-
// Process fully resolved batch
2632-
UnresolvedBatch& ready_batch = unresolved_batch;
2633-
if (last_batch && !vvl::Contains(ready_batch.resolved_dependencies, last_batch)) {
2634-
ready_batch.batch->ResolveLastBatch(last_batch);
2635-
ready_batch.resolved_dependencies.emplace_back(std::move(last_batch));
2636-
}
2637-
last_batch = ready_batch.batch;
2638-
2639-
const auto async_batches = ready_batch.batch->RegisterAsyncContexts(ready_batch.resolved_dependencies);
2640-
2641-
skip |= ready_batch.batch->ValidateSubmit(ready_batch.command_buffers, ready_batch.submit_index, ready_batch.batch_index,
2642-
ready_batch.label_stack, error_obj);
2643-
2644-
const auto submit_signals = vvl::make_span(ready_batch.signals.data(), ready_batch.signals.size());
2645-
return signals_update.RegisterSignals(ready_batch.batch, submit_signals);
2646-
}
2647-
26482642
void SyncValidator::PostCallRecordGetFenceStatus(VkDevice device, VkFence fence, const RecordObject& record_obj) {
26492643
if (!syncval_settings.submit_time_validation) return;
26502644
if (record_obj.result == VK_SUCCESS) {

layers/sync/sync_validation.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,8 @@ class SyncValidator : public vvl::DeviceProxy {
9797
bool PropagateTimelineSignals(SignalsUpdate& signals_update, const ErrorObject& error_obj);
9898

9999
// Return true if new timeline signals were generated by resolved batches
100-
bool PropagateTimelineSignalsIteration(std::vector<UnresolvedQueue> &queues, SignalsUpdate &signals_update, bool &skip,
101-
const ErrorObject &error_ob) const;
102-
bool ProcessUnresolvedBatch(UnresolvedBatch &unresolved_batch, SignalsUpdate &signals_update, BatchContextPtr &last_batch,
103-
bool &skip, const ErrorObject &error_obj) const;
100+
bool ProcessUnresolvedBatches(std::vector<UnresolvedQueue>& queues, SignalsUpdate& signals_update, bool& skip,
101+
const ErrorObject& error_ob) const;
104102

105103
void ApplyTaggedWait(QueueId queue_id, ResourceUsageTag tag, const LastSynchronizedPresent &last_synchronized_present,
106104
const std::vector<ResourceUsageTag> &queue_sync_tags);

tests/framework/sync_val_tests.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@ class VkSyncValTest : public VkLayerTest {
4444
std::unique_ptr<vkt::rt::Pipeline> GetTraceRaysPipeline(VkAccelerationStructureKHR as);
4545
std::unique_ptr<vkt::as::BuildGeometryInfoKHR> BuildBLAS();
4646
std::unique_ptr<vkt::as::BuildGeometryInfoKHR> BuildTLAS(const vkt::as::AccelerationStructureKHR &blas);
47+
48+
static std::pair<vkt::Queue*, vkt::Queue*> GetTwoQueuesFromSameFamily(const std::vector<vkt ::Queue*>& queues);
4749
};

tests/unit/sync_val.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4762,17 +4762,6 @@ TEST_F(NegativeSyncVal, QSBufferCopyVsFence) {
47624762
m_default_queue->Wait();
47634763
}
47644764

4765-
static std::pair<vkt::Queue*, vkt::Queue*> GetTwoQueuesFromSameFamily(const std::vector<vkt ::Queue*>& queues) {
4766-
for (size_t i = 0; i < queues.size(); i++) {
4767-
for (size_t k = i + 1; k < queues.size(); k++) {
4768-
if (queues[i]->family_index == queues[k]->family_index) {
4769-
return {queues[i], queues[k]};
4770-
}
4771-
}
4772-
}
4773-
return {};
4774-
}
4775-
47764765
TEST_F(NegativeSyncVal, QSBufferCopyQSORules) {
47774766
all_queue_count_ = true;
47784767
RETURN_IF_SKIP(InitSyncVal());

tests/unit/sync_val_positive.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ void VkSyncValTest::InitRayTracing() {
9797
RETURN_IF_SKIP(InitSyncVal());
9898
}
9999

100+
std::pair<vkt::Queue*, vkt::Queue*> VkSyncValTest::GetTwoQueuesFromSameFamily(const std::vector<vkt ::Queue*>& queues) {
101+
for (size_t i = 0; i < queues.size(); i++) {
102+
for (size_t k = i + 1; k < queues.size(); k++) {
103+
if (queues[i]->family_index == queues[k]->family_index) {
104+
return {queues[i], queues[k]};
105+
}
106+
}
107+
}
108+
return {};
109+
}
110+
100111
TEST_F(PositiveSyncVal, BufferCopyNonOverlappedRegions) {
101112
TEST_DESCRIPTION("Copy to non-overlapped regions of the same buffer");
102113
RETURN_IF_SKIP(InitSyncVal());

tests/unit/sync_val_semaphore_positive.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,3 +795,41 @@ TEST_F(PositiveSyncValTimelineSemaphore, ConcurrentHostSignalAndSubmit) {
795795
}
796796
thread.join();
797797
}
798+
799+
TEST_F(PositiveSyncValTimelineSemaphore, WaitBeforeSinglaBlocksAnotherSubmit) {
800+
TEST_DESCRIPTION("Test that a batch blocked by a wait-before-signal batch is not validated");
801+
all_queue_count_ = true;
802+
RETURN_IF_SKIP(InitTimelineSemaphore());
803+
804+
auto [queue0, queue1] = GetTwoQueuesFromSameFamily(m_device->QueuesWithTransferCapability());
805+
if (!queue0) {
806+
GTEST_SKIP() << "Test requires two queues with transfer capabilities from the same queue family";
807+
}
808+
809+
vkt::Buffer buffer_a(*m_device, 1024, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
810+
vkt::Buffer buffer_b(*m_device, 1024, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
811+
812+
vkt::CommandPool command_pool(*m_device, queue0->family_index);
813+
vkt::CommandBuffer command_buffer(*m_device, command_pool);
814+
command_buffer.Begin(VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
815+
command_buffer.Copy(buffer_a, buffer_b);
816+
command_buffer.End();
817+
818+
vkt::Semaphore timeline(*m_device, VK_SEMAPHORE_TYPE_TIMELINE);
819+
820+
queue0->Submit(vkt::no_cmd, vkt::TimelineWait(timeline, 2));
821+
queue0->Submit(command_buffer); // This will be block until the above wait is resolved
822+
823+
// This seemingly useless submit tests that syncval stops processing pending batches when
824+
// it encounters the first batch blocked by wait-before-signal.
825+
//
826+
// Signal=1 does not resolve wait=2 in the first submit above, so the following submit on
827+
// the same queue should not be processed, even though it does not wait on the semaphore directly.
828+
//
829+
// We had a bug where syncval iterated over all pending batches instead of stopping at the
830+
// first blocked one. This introduced false positives.
831+
queue1->Submit(command_buffer, vkt::TimelineSignal(timeline, 1));
832+
833+
queue1->Submit(vkt::no_cmd, vkt::TimelineSignal(timeline, 2));
834+
m_device->Wait();
835+
}

0 commit comments

Comments
 (0)