@@ -827,7 +827,8 @@ static void printZeEventList(_pi_ze_event_list_t &PiZeEventList) {
827
827
zePrint (" \n " );
828
828
}
829
829
830
- pi_result _pi_ze_event_list_t::releaseAndDestroyPiZeEventList () {
830
+ pi_result _pi_ze_event_list_t::collectEventsForReleaseAndDestroyPiZeEventList (
831
+ std::list<pi_event> &EventsToBeReleased) {
831
832
// acquire a lock before reading the length and list fields.
832
833
// Acquire the lock, copy the needed data locally, and reset
833
834
// the fields, then release the lock.
@@ -854,7 +855,8 @@ pi_result _pi_ze_event_list_t::releaseAndDestroyPiZeEventList() {
854
855
}
855
856
856
857
for (pi_uint32 I = 0 ; I < LocLength; I++) {
857
- piEventRelease (LocPiEventList[I]);
858
+ // Add the event to be released to the list
859
+ EventsToBeReleased.push_back (LocPiEventList[I]);
858
860
}
859
861
860
862
if (LocZeEventList != nullptr ) {
@@ -3755,6 +3757,7 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
3755
3757
// This currently recycles the associate command list, and also makes
3756
3758
// sure to release any kernel that may have been used by the event.
3757
3759
static void cleanupAfterEvent (pi_event Event) {
3760
+ zePrint (" cleanupAfterEvent entry\n " );
3758
3761
// The implementation of this is slightly tricky. The same event
3759
3762
// can be referred to by multiple threads, so it is possible to
3760
3763
// have a race condition between the read of fields of the event,
@@ -3798,11 +3801,27 @@ static void cleanupAfterEvent(pi_event Event) {
3798
3801
}
3799
3802
}
3800
3803
3801
- // Had to make sure lock of Event's Queue has been released before
3802
- // the code starts to release the event wait list, as that could
3803
- // potentially cause recursive calls to cleanupAfterEvent, and
3804
- // run into a problem trying to do multiple locks on the same Queue.
3805
- Event->WaitList .releaseAndDestroyPiZeEventList ();
3804
+ // Make a list of all the dependent events that must have signalled
3805
+ // because this event was dependent on them. This list will be appended
3806
+ // to as we walk it so that this algorithm doesn't go recursive
3807
+ // due to dependent events themselves being dependent on other events
3808
+ // forming a potentially very deep tree, and deep recursion. That
3809
+ // turned out to be a significant problem with the recursive code
3810
+ // that preceded this implementation.
3811
+
3812
+ std::list<pi_event> EventsToBeReleased;
3813
+
3814
+ Event->WaitList .collectEventsForReleaseAndDestroyPiZeEventList (
3815
+ EventsToBeReleased);
3816
+
3817
+ while (!EventsToBeReleased.empty ()) {
3818
+ pi_event DepEvent = EventsToBeReleased.front ();
3819
+ EventsToBeReleased.pop_front ();
3820
+
3821
+ DepEvent->WaitList .collectEventsForReleaseAndDestroyPiZeEventList (
3822
+ EventsToBeReleased);
3823
+ piEventRelease (DepEvent);
3824
+ }
3806
3825
zePrint (" cleanupAfterEvent exit\n " );
3807
3826
}
3808
3827
0 commit comments