Skip to content

Commit

Permalink
[SYCL][SCLA] Add aligned_private_alloca support (intel#13490)
Browse files Browse the repository at this point in the history
Add support for `aligned_private_alloca`. Use new
`__builtin_intel_sycl_alloca_with_align` builtin in implementation.

Revamp diagnostic messages to support the new builtin.

Add SPIR-V integration tests.

---------

Signed-off-by: Victor Perez <[email protected]>
  • Loading branch information
victor-eds committed Apr 29, 2024
1 parent de7413f commit 004efa3
Show file tree
Hide file tree
Showing 22 changed files with 553 additions and 82 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4722,6 +4722,12 @@ def IntelSYCLAlloca : Builtin {
let Prototype = "void *(void &)";
}

def IntelSYCLAllocaWithAlign : Builtin {
let Spellings = ["__builtin_intel_sycl_alloca_with_align"];
let Attributes = [NoThrow, CustomTypeChecking];
let Prototype = "void *(void &)";
}

// Builtins for Intel FPGA
def IntelSYCLFPGAReg : Builtin {
let Spellings = ["__builtin_intel_fpga_reg"];
Expand Down
32 changes: 14 additions & 18 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -178,29 +178,25 @@ def err_intel_sycl_ptr_annotation_mismatch
"|a string literal or constexpr const char*}0">;

def err_intel_sycl_alloca_no_alias
: Error<"__builtin_intel_sycl_alloca cannot be used in source code. "
"Use the private_alloca alias instead">;
def err_intel_sycl_alloca_wrong_arg_count
: Error<"__builtin_intel_sycl_alloca expects to be passed a single "
"argument. Got %0">;
: Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 cannot be used in "
"source code. Use the %select{|aligned_}0private_alloca alias "
"instead">;
def err_intel_sycl_alloca_wrong_template_arg_count
: Error<"__builtin_intel_sycl_alloca expects to be passed three template "
"arguments. Got %0">;
: Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 expects to be "
"passed %select{3|4}0 template arguments. Got %1">;
def err_intel_sycl_alloca_wrong_arg
: Error<"__builtin_intel_sycl_alloca expects to be passed an argument of type "
"'sycl::kernel_handler &'. Got %0">;
: Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 expects to be "
"passed an argument of type 'sycl::kernel_handler &'. Got %1">;
def err_intel_sycl_alloca_wrong_type
: Error<"__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' "
"to a cv-unqualified trivial type. Got %0">;
: Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 can only return "
"'sycl::private_ptr' to a cv-unqualified trivial type. Got %1">;
def err_intel_sycl_alloca_wrong_size
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
"constant of integral value type as a template argument. Got %1 (%0)">;
def err_intel_sycl_alloca_no_size
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
"constant of integral value type as a template argument. Got %0">;
: Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 must be passed "
"a specialization constant of integral value type as a template "
"argument. Got %1">;
def warn_intel_sycl_alloca_bad_default_value : Warning<
"__builtin_intel_sycl_alloca expects a specialization constant with a "
"default value of at least one as an argument. Got %0">,
"__builtin_intel_sycl_alloca%select{|_with_align}0 expects a specialization "
"constant with a default value of at least one as an argument. Got %1">,
InGroup<SyclPrivateAllocaPositiveSize>;

// C99 variable-length arrays
Expand Down
37 changes: 25 additions & 12 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6128,7 +6128,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_intel_sycl_ptr_annotation:
return EmitIntelSYCLPtrAnnotationBuiltin(E);
case Builtin::BI__builtin_intel_sycl_alloca:
return EmitIntelSYCLAllocaBuiltin(E, ReturnValue);
case Builtin::BI__builtin_intel_sycl_alloca_with_align:
return EmitIntelSYCLAllocaBuiltin(BuiltinID, E, ReturnValue);
case Builtin::BI__builtin_get_device_side_mangled_name: {
auto Name = CGM.getCUDARuntime().getDeviceSideName(
cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
Expand Down Expand Up @@ -23913,17 +23914,30 @@ RValue CodeGenFunction::EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E) {
return RValue::get(Ann);
}

RValue
CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
ReturnValueSlot ReturnValue) {
RValue CodeGenFunction::EmitIntelSYCLAllocaBuiltin(
unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) {
assert((BuiltinID == Builtin::BI__builtin_intel_sycl_alloca ||
BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align) &&
"Unexpected builtin");

bool IsAlignedAlloca =
BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align;

constexpr unsigned InvalidIndex = -1;
constexpr unsigned ElementTypeIndex = 0;
const unsigned AlignmentIndex = IsAlignedAlloca ? 1 : InvalidIndex;
const unsigned SpecNameIndex = IsAlignedAlloca ? 2 : 1;
const unsigned DecorateAddressIndex = IsAlignedAlloca ? 3 : 2;

const FunctionDecl *FD = E->getDirectCallee();
assert(FD && "Expecting direct call to builtin");

SourceLocation Loc = E->getExprLoc();

// Get specialization constant ID.
ValueDecl *SpecConst =
FD->getTemplateSpecializationArgs()->get(1).getAsDecl();
const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs();
assert(TAL && "Expecting template argument list");
ValueDecl *SpecConst = TAL->get(SpecNameIndex).getAsDecl();
DeclRefExpr *Ref = DeclRefExpr::Create(
getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst,
/*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(),
Expand All @@ -23942,18 +23956,17 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
cast<llvm::PointerType>(SpecConstPtr->getType()));

// Get allocation type.
const TemplateArgumentList &TAL =
cast<ClassTemplateSpecializationDecl>(E->getType()->getAsCXXRecordDecl())
->getTemplateArgs();
QualType AllocaType = TAL.get(0).getAsType();
QualType AllocaType = TAL->get(ElementTypeIndex).getAsType();
llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType);
unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace();
llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS);

llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty);

llvm::Constant *Align = Builder.getInt64(
getContext().getTypeAlignInChars(AllocaType).getAsAlign().value());
IsAlignedAlloca
? TAL->get(AlignmentIndex).getAsIntegral().getZExtValue()
: getContext().getTypeAlignInChars(AllocaType).getAsAlign().value());

llvm::Value *Allocation = [&]() {
// To implement automatic storage duration of the underlying memory object,
Expand Down Expand Up @@ -23985,7 +23998,7 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
// Perform AS cast if needed.

constexpr int NoDecorated = 0;
llvm::APInt Decorated = TAL.get(2).getAsIntegral();
llvm::APInt Decorated = TAL->get(DecorateAddressIndex).getAsIntegral();
// Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private
// AS) pointer type. Perform cast if 'sycl::access::decorated::no'.
if (Decorated == NoDecorated) {
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4588,7 +4588,7 @@ class CodeGenFunction : public CodeGenTypeCache {
RValue EmitIntelFPGAMemBuiltin(const CallExpr *E);

RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E);
RValue EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
RValue EmitIntelSYCLAllocaBuiltin(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);

llvm::CallInst *
Expand Down
73 changes: 56 additions & 17 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3013,9 +3013,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
return ExprError();
break;
case Builtin::BI__builtin_intel_sycl_alloca:
case Builtin::BI__builtin_intel_sycl_alloca_with_align:
if (!Context.getLangOpts().SYCLIsDevice) {
Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language)
<< "__builtin_intel_sycl_alloca"
<< (BuiltinID == Builtin::BI__builtin_intel_sycl_alloca
? "__builtin_intel_sycl_alloca"
: "__builtin_intel_sycl_alloca_with_align")
<< "SYCL device";
return ExprError();
}
Expand Down Expand Up @@ -7817,18 +7820,31 @@ static llvm::APSInt getSYCLAllocaDefaultSize(const ASTContext &Ctx,
return Default.getInt();
}

bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *Call) {
assert(getLangOpts().SYCLIsDevice &&
"Builtin can only be used in SYCL device code");

assert((BuiltinID == Builtin::BI__builtin_intel_sycl_alloca ||
BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align) &&
"Unexpected builtin");

bool IsAlignedAlloca =
BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align;

constexpr unsigned InvalidIndex = -1;
constexpr unsigned ElementTypeIndex = 0;
const unsigned AlignmentIndex = IsAlignedAlloca ? 1 : InvalidIndex;
const unsigned SpecNameIndex = IsAlignedAlloca ? 2 : 1;

SourceLocation Loc = Call->getBeginLoc();

// This builtin cannot be called directly. As it needs to pass template
// arguments, this is always an alias.
const FunctionDecl *FD = Call->getDirectCallee();
assert(FD && "Builtin cannot be called from a function pointer");
if (!FD->hasAttr<BuiltinAliasAttr>()) {
Diag(Loc, diag::err_intel_sycl_alloca_no_alias);
Diag(Loc, diag::err_intel_sycl_alloca_no_alias) << IsAlignedAlloca;
return true;
}

Expand All @@ -7837,10 +7853,11 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
return true;

// Check three template arguments are passed
if (const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs();
!TAL || TAL->size() != 3) {
unsigned DesiredTemplateArgumentsCount = IsAlignedAlloca ? 4 : 3;
const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs();
if (!CST || CST->size() != DesiredTemplateArgumentsCount) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_template_arg_count)
<< (TAL ? TAL->size() : 0);
<< IsAlignedAlloca << (CST ? CST->size() : 0);
return true;
}

Expand All @@ -7854,7 +7871,7 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
};
if (CheckArg(FD->getParamDecl(0)->getType())) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_arg)
<< FD->getParamDecl(0)->getType();
<< IsAlignedAlloca << FD->getParamDecl(0)->getType();
return true;
}

Expand All @@ -7876,14 +7893,18 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
return TAL.get(1).getAsIntegral() != PrivateAS;
};
if (CheckType(FD->getReturnType(), getASTContext())) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) << FD->getReturnType();
Diag(Loc, diag::err_intel_sycl_alloca_wrong_type)
<< IsAlignedAlloca << FD->getReturnType();
return true;
}

// Check size is passed as a specialization constant
const auto CheckSize = [this](const ASTContext &Ctx, SourceLocation Loc,
const TemplateArgumentList *CST) {
QualType Ty = CST->get(1).getNonTypeTemplateArgumentType();
const auto CheckSize = [this, IsAlignedAlloca, ElementTypeIndex,
SpecNameIndex](const ASTContext &Ctx,
SourceLocation Loc,
const TemplateArgumentList *CST) {
TemplateArgument TA = CST->get(SpecNameIndex);
QualType Ty = TA.getNonTypeTemplateArgumentType();
if (Ty.isNull() || !Ty->isReferenceType())
return true;
Ty = Ty->getPointeeType();
Expand All @@ -7895,23 +7916,41 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
if (!TAL.get(0).getAsType()->isIntegralType(Ctx))
return true;
llvm::APSInt DefaultSize =
getSYCLAllocaDefaultSize(Ctx, cast<VarDecl>(CST->get(1).getAsDecl()));
getSYCLAllocaDefaultSize(Ctx, cast<VarDecl>(TA.getAsDecl()));
if (DefaultSize < 1)
Diag(Loc, diag::warn_intel_sycl_alloca_bad_default_value)
<< DefaultSize.getSExtValue();
<< IsAlignedAlloca << DefaultSize.getSExtValue();
return false;
};
const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs();
if (CheckSize(getASTContext(), Loc, CST)) {
TemplateArgument TA = CST->get(1);
TemplateArgument TA = CST->get(SpecNameIndex);
QualType Ty = TA.getNonTypeTemplateArgumentType();
const SemaDiagnosticBuilder &D =
Diag(Loc, diag::err_intel_sycl_alloca_wrong_size);
D << IsAlignedAlloca;
if (Ty.isNull())
Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA;
D << TA;
else
Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty;
D << Ty;
return true;
}

if (IsAlignedAlloca) {
TemplateArgument AlignmentArg = CST->get(AlignmentIndex);
llvm::APSInt RequestedAlign = AlignmentArg.getAsIntegral();
if (!RequestedAlign.isPowerOf2())
return Diag(Loc, diag::err_alignment_not_power_of_two);
constexpr int32_t MaxAllowedAlign = std::numeric_limits<int32_t>::max() / 8;
if (RequestedAlign > MaxAllowedAlign)
return Diag(Loc, diag::err_alignment_too_big) << MaxAllowedAlign;
QualType AllocaType = CST->get(ElementTypeIndex).getAsType();
int64_t AllocaRequiredAlignment =
Context.getTypeAlignInChars(AllocaType).getQuantity();
if (RequestedAlign < AllocaRequiredAlignment)
return Diag(Loc, diag::err_alignas_underaligned)
<< AllocaType << AllocaRequiredAlignment;
}

return false;
}
/// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGenSYCL/Inputs/private_alloca.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca)
multi_ptr<ElementType, access::address_space::private_space,
DecorateAddress> private_alloca(kernel_handler &h);

template <typename ElementType, size_t Alignment, auto &Size,
access::decorated DecorateAddress>
__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align)
[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_private_alloca)]]
multi_ptr<ElementType, access::address_space::private_space,
DecorateAddress> aligned_private_alloca(kernel_handler &h);
} // namespace experimental
} // namesapce oneapi
} // namespace ext
Expand Down
28 changes: 27 additions & 1 deletion clang/test/CodeGenSYCL/builtin-alloca.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s \
// RUN: | FileCheck %s

// Test codegen for __builtin_intel_sycl_alloca.
// Test codegen for __builtin_intel_sycl_alloca and __builtin_intel_sycl_alloca_with_align.

#include <stddef.h>

Expand Down Expand Up @@ -53,5 +53,31 @@ SYCL_EXTERNAL void test(sycl::kernel_handler &kh) {

// CHECK: declare !sycl_used_aspects ![[#USED_ASPECTS]] ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs

// CHECK-LABEL: define dso_local spir_func void @_Z12test_alignedRN4sycl3_V114kernel_handlerE(
// CHECK-SAME: ptr addrspace(4) noundef align 1 dereferenceable(1) [[KH:%.*]])
// CHECK-NEXT: entry:
// CHECK-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8
// CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 16)
// CHECK-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 8)
// CHECK-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8
// CHECK-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 4)
// CHECK-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4)
// CHECK-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4)
// CHECK-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4)
// CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8
// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8
// CHECK-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8
// CHECK-NEXT: ret void
SYCL_EXTERNAL void test_aligned(sycl::kernel_handler &kh) {
auto ptr0 = sycl::ext::oneapi::experimental::aligned_private_alloca<double, alignof(double) * 2, size, sycl::access::decorated::yes>(kh);
auto ptr1 = sycl::ext::oneapi::experimental::aligned_private_alloca<int, alignof(long), intSize, sycl::access::decorated::legacy>(kh);
auto ptr2 = sycl::ext::oneapi::experimental::aligned_private_alloca<myStruct, alignof(myStruct) * 4, intSize, sycl::access::decorated::no>(kh);
}

// CHECK-DAG: ![[#USED_ASPECTS]] = !{i32 [[#PRIVATE_ALLOCA_ASPECT:]]}
// CHECK-DAG: !{!"ext_oneapi_private_alloca", i32 [[#PRIVATE_ALLOCA_ASPECT]]}
7 changes: 7 additions & 0 deletions clang/test/SemaSYCL/Inputs/private_alloca.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include "./sycl.hpp"

#include <stddef.h>

namespace sycl {
inline namespace _V1 {
namespace ext {
Expand All @@ -13,6 +15,11 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca)
multi_ptr<ElementType, access::address_space::private_space,
DecorateAddress> private_alloca(kernel_handler &h);

template <typename ElementType, size_t Alignment, auto &Size,
access::decorated DecorateAddress>
__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align)
multi_ptr<ElementType, access::address_space::private_space,
DecorateAddress> aligned_private_alloca(kernel_handler &h);
} // namespace experimental
} // namesapce oneapi
} // namespace ext
Expand Down
Loading

0 comments on commit 004efa3

Please sign in to comment.