Skip to content

Commit

Permalink
[SYCL][XPTI] Refactoring framework to use 128-bit keys for collision …
Browse files Browse the repository at this point in the history
…elimination (#14467)

Previous implementation of the XPTI framework used 64-bit hash values to
represent trace points in the code and this has led to a few of hash
collisions.This refactoring moves to a 128-bit key to guarantee
uniqueness. The changes needed to SYCL runtime to fully migrate to newer
APIs will be pushed as a **separate Part 2 pull request**. Current pull
request include changes to the XPTI framework and minor changes to SYCL
runtime to reflect the transition to 128-bit keys and ensure validity of
the tests.
 - 128-bit keys for internal storage and lookups
 - Support 64-bit universal IDs for backward compatibility
 - Updated tests to handle legacy API and new APIs for correctness tests
- Updated performance tests to report metrics for both 64-bit and 28-bit
native APIs
- Updated SYCL instrumentation to return a new trace event for each
instance of a trace point. Earlier implementation always returned the
same trace event for a give trace point as the metadata associated with
a trace event was deemed to be invariant. However, with the need for
mutable metadata, this change is required.
 - Minor updates to documentation

**NOTE**: Since more events are generated due to the creation of a new
trace event for each trace point instance, some tests that rely on event
sequences may have to be updated.

---------

Signed-off-by: Vasanth Tovinkere <[email protected]>
Signed-off-by: Tikhomirova, Kseniya <[email protected]>
Co-authored-by: Tikhomirova, Kseniya <[email protected]>
Co-authored-by: Artur Gainullin <[email protected]>
  • Loading branch information
3 people committed Aug 27, 2024
1 parent f995f55 commit 283073a
Show file tree
Hide file tree
Showing 28 changed files with 6,375 additions and 1,036 deletions.
154 changes: 77 additions & 77 deletions sycl/doc/design/SYCLInstrumentationUsingXPTI.md

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,6 @@ void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
return TraceEvent;
// Use a thread-safe counter to get a unique instance ID for the wait() on the
// event
static std::atomic<uint64_t> InstanceID = {1};
xpti::trace_event_data_t *WaitEvent = nullptr;

// Create a string with the event address so it
Expand All @@ -195,11 +192,20 @@ void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
Command *Cmd = (Command *)MCommand;
WaitEvent = Cmd->MTraceEvent ? static_cast<xpti_td *>(Cmd->MTraceEvent)
: GSYCLGraphEvent;
} else
WaitEvent = GSYCLGraphEvent;

} else {
// If queue.wait() is used, we want to make sure the information about the
// queue is available with the wait events. We check to see if the
// TraceEvent is available in the Queue object.
void *TraceEvent = nullptr;
if (QueueImplPtr Queue = MQueue.lock()) {
TraceEvent = Queue->getTraceEvent();
WaitEvent =
(TraceEvent ? static_cast<xpti_td *>(TraceEvent) : GSYCLGraphEvent);
} else
WaitEvent = GSYCLGraphEvent;
}
// Record the current instance ID for use by Epilog
IId = InstanceID++;
IId = xptiGetUniqueId();
xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, WaitEvent,
IId, static_cast<const void *>(Name.c_str()));
TraceEvent = (void *)WaitEvent;
Expand Down
62 changes: 60 additions & 2 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
// information
XPTIScope PrepareNotify((void *)this,
(uint16_t)xpti::trace_point_type_t::node_create,
SYCL_STREAM_NAME, "memory_transfer_node");
SYCL_STREAM_NAME, "memory_transfer_node::memset");
PrepareNotify.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice->getHandleRef()));
Expand Down Expand Up @@ -202,7 +202,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
// pointer.
XPTIScope PrepareNotify((void *)this,
(uint16_t)xpti::trace_point_type_t::node_create,
SYCL_STREAM_NAME, "memory_transfer_node");
SYCL_STREAM_NAME, "memory_transfer_node::memcpy");
PrepareNotify.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice->getHandleRef()));
Expand Down Expand Up @@ -631,6 +631,64 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
#endif
}

void queue_impl::constructorNotification() {
#if XPTI_ENABLE_INSTRUMENTATION
if (xptiTraceEnabled()) {
MStreamID = xptiRegisterStream(SYCL_STREAM_NAME);
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) {
xpti::utils::StringHelper SH;
std::string AddrStr = SH.addressAsString<size_t>(MQueueID);
std::string QueueName = SH.nameWithAddressString("queue", AddrStr);
// Create a payload for the queue create event as we do not get code
// location for the queue create event
xpti::payload_t QPayload(QueueName.c_str());
MInstanceID = xptiGetUniqueId();
uint64_t RetInstanceNo;
xpti_td *TEvent =
xptiMakeEvent("queue_create", &QPayload,
(uint16_t)xpti::trace_event_type_t::algorithm,
xpti_at::active, &RetInstanceNo);
// Cache the trace event, stream id and instance IDs for the destructor
MTraceEvent = (void *)TEvent;

xpti::addMetadata(TEvent, "sycl_context",
reinterpret_cast<size_t>(MContext->getHandleRef()));
if (MDevice) {
xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName());
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice->getHandleRef()));
}
xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
xpti::addMetadata(TEvent, "queue_handle",
reinterpret_cast<size_t>(getHandleRef()));
// Also publish to TLS before notification
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
xptiNotifySubscribers(
MStreamID, (uint16_t)xpti::trace_point_type_t::queue_create, nullptr,
TEvent, MInstanceID, static_cast<const void *>("queue_create"));
}
}
#endif
}

void queue_impl::destructorNotification() {
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_destroy);
if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) {
// Use the cached trace event, stream id and instance IDs for the
// destructor
xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr,
(xpti::trace_event_data_t *)MTraceEvent, MInstanceID,
static_cast<const void *>("queue_destroy"));
xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent);
}
#endif
}

ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
const PluginPtr &Plugin = getPlugin();
if (getContextImplPtr()->getBackend() == backend::opencl)
Expand Down
94 changes: 21 additions & 73 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,37 +173,10 @@ class queue_impl {
// We enable XPTI tracing events using the TLS mechanism; if the code
// location data is available, then the tracing data will be rich.
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
// Using the instance override constructor for use with queues as queues
// maintain instance IDs in the object
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, MQueueID, "queue_create");
// Cache the trace event, stream id and instance IDs for the destructor
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
MTraceEvent = (void *)PrepareNotify.traceEvent();
MStreamID = PrepareNotify.streamID();
MInstanceID = PrepareNotify.instanceID();
// Add the function to capture meta data for the XPTI trace event
PrepareNotify.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_context",
reinterpret_cast<size_t>(MContext->getHandleRef()));
if (MDevice) {
xpti::addMetadata(TEvent, "sycl_device_name",
MDevice->getDeviceName());
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice->getHandleRef()));
}
xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
xpti::addMetadata(TEvent, "queue_handle",
reinterpret_cast<size_t>(getHandleRef()));
});
// Also publish to TLS
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
// Emit a trace event for queue creation; we currently do not get code
// location information, so all queueus will have the same UID with a
// different instance ID until this gets added.
constructorNotification();
#endif
}

Expand Down Expand Up @@ -236,35 +209,10 @@ class queue_impl {
// is the prolog section and the epilog section will initiate the
// notification.
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, MQueueID, "queue_create");
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
// Cache the trace event, stream id and instance IDs for the destructor
MTraceEvent = (void *)PrepareNotify.traceEvent();
MStreamID = PrepareNotify.streamID();
MInstanceID = PrepareNotify.instanceID();

// Add the function to capture meta data for the XPTI trace event
PrepareNotify.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_context",
reinterpret_cast<size_t>(MContext->getHandleRef()));
if (MDevice) {
xpti::addMetadata(TEvent, "sycl_device_name",
MDevice->getDeviceName());
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice->getHandleRef()));
}
xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
});
// Also publish to TLS before notification
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
// Emit a trace event for queue creation; we currently do not get code
// location information, so all queueus will have the same UID with a
// different instance ID until this gets added.
constructorNotification();
#endif
}

Expand Down Expand Up @@ -306,20 +254,11 @@ class queue_impl {

~queue_impl() {
try {
// The trace event created in the constructor should be active through the
// lifetime of the queue object as member variables when ABI breakage is
// allowed. This example shows MTraceEvent as a member variable.
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_destroy);
if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) {
// Used cached information in member variables
xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr,
(xpti::trace_event_data_t *)MTraceEvent,
MInstanceID,
static_cast<const void *>("queue_destroy"));
xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent);
}
// The trace event created in the constructor should be active through the
// lifetime of the queue object as member variable. We will send a
// notification and destroy the trace event for this queue.
destructorNotification();
#endif
throw_asynchronous();
cleanup_fusion_cmd();
Expand Down Expand Up @@ -748,6 +687,8 @@ class queue_impl {

unsigned long long getQueueID() { return MQueueID; }

void *getTraceEvent() { return MTraceEvent; }

void setExternalEvent(const event &Event) {
std::lock_guard<std::mutex> Lock(MInOrderExternalEventMtx);
MInOrderExternalEvent = Event;
Expand Down Expand Up @@ -937,6 +878,13 @@ class queue_impl {
void instrumentationEpilog(void *TelementryEvent, std::string &Name,
int32_t StreamID, uint64_t IId);

// We need to emit a queue_create notification when a queue object is created
void constructorNotification();

// We need to emit a queue_destroy notification when a queue object is
// destroyed
void destructorNotification();

/// queue_impl.addEvent tracks events with weak pointers
/// but some events have no other owners. addSharedEvent()
/// follows events with a shared pointer.
Expand Down
Loading

0 comments on commit 283073a

Please sign in to comment.