diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 408c9c0654e5..b348c6add447 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4722,6 +4722,12 @@ def IntelSYCLAlloca : Builtin { let Prototype = "void *(void &)"; } +def IntelSYCLAllocaWithAlign : Builtin { + let Spellings = ["__builtin_intel_sycl_alloca_with_align"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void *(void &)"; +} + // Builtins for Intel FPGA def IntelSYCLFPGAReg : Builtin { let Spellings = ["__builtin_intel_fpga_reg"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c27668991731..340f44cb2e25 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -178,29 +178,25 @@ def err_intel_sycl_ptr_annotation_mismatch "|a string literal or constexpr const char*}0">; def err_intel_sycl_alloca_no_alias - : Error<"__builtin_intel_sycl_alloca cannot be used in source code. " - "Use the private_alloca alias instead">; -def err_intel_sycl_alloca_wrong_arg_count - : Error<"__builtin_intel_sycl_alloca expects to be passed a single " - "argument. Got %0">; + : Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 cannot be used in " + "source code. Use the %select{|aligned_}0private_alloca alias " + "instead">; def err_intel_sycl_alloca_wrong_template_arg_count - : Error<"__builtin_intel_sycl_alloca expects to be passed three template " - "arguments. Got %0">; + : Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 expects to be " + "passed %select{3|4}0 template arguments. Got %1">; def err_intel_sycl_alloca_wrong_arg - : Error<"__builtin_intel_sycl_alloca expects to be passed an argument of type " - "'sycl::kernel_handler &'. Got %0">; + : Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 expects to be " + "passed an argument of type 'sycl::kernel_handler &'. Got %1">; def err_intel_sycl_alloca_wrong_type - : Error<"__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' " - "to a cv-unqualified trivial type. Got %0">; + : Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 can only return " + "'sycl::private_ptr' to a cv-unqualified trivial type. Got %1">; def err_intel_sycl_alloca_wrong_size - : Error<"__builtin_intel_sycl_alloca must be passed a specialization " - "constant of integral value type as a template argument. Got %1 (%0)">; -def err_intel_sycl_alloca_no_size - : Error<"__builtin_intel_sycl_alloca must be passed a specialization " - "constant of integral value type as a template argument. Got %0">; + : Error<"__builtin_intel_sycl_alloca%select{|_with_align}0 must be passed " + "a specialization constant of integral value type as a template " + "argument. Got %1">; def warn_intel_sycl_alloca_bad_default_value : Warning< - "__builtin_intel_sycl_alloca expects a specialization constant with a " - "default value of at least one as an argument. Got %0">, + "__builtin_intel_sycl_alloca%select{|_with_align}0 expects a specialization " + "constant with a default value of at least one as an argument. Got %1">, InGroup; // C99 variable-length arrays diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ce0f932b2896..f5d0eeb65884 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6128,7 +6128,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_intel_sycl_ptr_annotation: return EmitIntelSYCLPtrAnnotationBuiltin(E); case Builtin::BI__builtin_intel_sycl_alloca: - return EmitIntelSYCLAllocaBuiltin(E, ReturnValue); + case Builtin::BI__builtin_intel_sycl_alloca_with_align: + return EmitIntelSYCLAllocaBuiltin(BuiltinID, E, ReturnValue); case Builtin::BI__builtin_get_device_side_mangled_name: { auto Name = CGM.getCUDARuntime().getDeviceSideName( cast(E->getArg(0)->IgnoreImpCasts())->getDecl()); @@ -23913,17 +23914,30 @@ RValue CodeGenFunction::EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E) { return RValue::get(Ann); } -RValue -CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, - ReturnValueSlot ReturnValue) { +RValue CodeGenFunction::EmitIntelSYCLAllocaBuiltin( + unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { + assert((BuiltinID == Builtin::BI__builtin_intel_sycl_alloca || + BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align) && + "Unexpected builtin"); + + bool IsAlignedAlloca = + BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align; + + constexpr unsigned InvalidIndex = -1; + constexpr unsigned ElementTypeIndex = 0; + const unsigned AlignmentIndex = IsAlignedAlloca ? 1 : InvalidIndex; + const unsigned SpecNameIndex = IsAlignedAlloca ? 2 : 1; + const unsigned DecorateAddressIndex = IsAlignedAlloca ? 3 : 2; + const FunctionDecl *FD = E->getDirectCallee(); assert(FD && "Expecting direct call to builtin"); SourceLocation Loc = E->getExprLoc(); // Get specialization constant ID. - ValueDecl *SpecConst = - FD->getTemplateSpecializationArgs()->get(1).getAsDecl(); + const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs(); + assert(TAL && "Expecting template argument list"); + ValueDecl *SpecConst = TAL->get(SpecNameIndex).getAsDecl(); DeclRefExpr *Ref = DeclRefExpr::Create( getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst, /*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(), @@ -23942,10 +23956,7 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, cast(SpecConstPtr->getType())); // Get allocation type. - const TemplateArgumentList &TAL = - cast(E->getType()->getAsCXXRecordDecl()) - ->getTemplateArgs(); - QualType AllocaType = TAL.get(0).getAsType(); + QualType AllocaType = TAL->get(ElementTypeIndex).getAsType(); llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType); unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace(); llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS); @@ -23953,7 +23964,9 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty); llvm::Constant *Align = Builder.getInt64( - getContext().getTypeAlignInChars(AllocaType).getAsAlign().value()); + IsAlignedAlloca + ? TAL->get(AlignmentIndex).getAsIntegral().getZExtValue() + : getContext().getTypeAlignInChars(AllocaType).getAsAlign().value()); llvm::Value *Allocation = [&]() { // To implement automatic storage duration of the underlying memory object, @@ -23985,7 +23998,7 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, // Perform AS cast if needed. constexpr int NoDecorated = 0; - llvm::APInt Decorated = TAL.get(2).getAsIntegral(); + llvm::APInt Decorated = TAL->get(DecorateAddressIndex).getAsIntegral(); // Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private // AS) pointer type. Perform cast if 'sycl::access::decorated::no'. if (Decorated == NoDecorated) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 652cbee65564..b5d173086e62 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4588,7 +4588,7 @@ class CodeGenFunction : public CodeGenTypeCache { RValue EmitIntelFPGAMemBuiltin(const CallExpr *E); RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E); - RValue EmitIntelSYCLAllocaBuiltin(const CallExpr *E, + RValue EmitIntelSYCLAllocaBuiltin(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); llvm::CallInst * diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a42035641274..5acd63e490fb 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3013,9 +3013,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; case Builtin::BI__builtin_intel_sycl_alloca: + case Builtin::BI__builtin_intel_sycl_alloca_with_align: if (!Context.getLangOpts().SYCLIsDevice) { Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language) - << "__builtin_intel_sycl_alloca" + << (BuiltinID == Builtin::BI__builtin_intel_sycl_alloca + ? "__builtin_intel_sycl_alloca" + : "__builtin_intel_sycl_alloca_with_align") << "SYCL device"; return ExprError(); } @@ -7817,10 +7820,23 @@ static llvm::APSInt getSYCLAllocaDefaultSize(const ASTContext &Ctx, return Default.getInt(); } -bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { +bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call) { assert(getLangOpts().SYCLIsDevice && "Builtin can only be used in SYCL device code"); + assert((BuiltinID == Builtin::BI__builtin_intel_sycl_alloca || + BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align) && + "Unexpected builtin"); + + bool IsAlignedAlloca = + BuiltinID == Builtin::BI__builtin_intel_sycl_alloca_with_align; + + constexpr unsigned InvalidIndex = -1; + constexpr unsigned ElementTypeIndex = 0; + const unsigned AlignmentIndex = IsAlignedAlloca ? 1 : InvalidIndex; + const unsigned SpecNameIndex = IsAlignedAlloca ? 2 : 1; + SourceLocation Loc = Call->getBeginLoc(); // This builtin cannot be called directly. As it needs to pass template @@ -7828,7 +7844,7 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { const FunctionDecl *FD = Call->getDirectCallee(); assert(FD && "Builtin cannot be called from a function pointer"); if (!FD->hasAttr()) { - Diag(Loc, diag::err_intel_sycl_alloca_no_alias); + Diag(Loc, diag::err_intel_sycl_alloca_no_alias) << IsAlignedAlloca; return true; } @@ -7837,10 +7853,11 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { return true; // Check three template arguments are passed - if (const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs(); - !TAL || TAL->size() != 3) { + unsigned DesiredTemplateArgumentsCount = IsAlignedAlloca ? 4 : 3; + const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs(); + if (!CST || CST->size() != DesiredTemplateArgumentsCount) { Diag(Loc, diag::err_intel_sycl_alloca_wrong_template_arg_count) - << (TAL ? TAL->size() : 0); + << IsAlignedAlloca << (CST ? CST->size() : 0); return true; } @@ -7854,7 +7871,7 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { }; if (CheckArg(FD->getParamDecl(0)->getType())) { Diag(Loc, diag::err_intel_sycl_alloca_wrong_arg) - << FD->getParamDecl(0)->getType(); + << IsAlignedAlloca << FD->getParamDecl(0)->getType(); return true; } @@ -7876,14 +7893,18 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { return TAL.get(1).getAsIntegral() != PrivateAS; }; if (CheckType(FD->getReturnType(), getASTContext())) { - Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) << FD->getReturnType(); + Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) + << IsAlignedAlloca << FD->getReturnType(); return true; } // Check size is passed as a specialization constant - const auto CheckSize = [this](const ASTContext &Ctx, SourceLocation Loc, - const TemplateArgumentList *CST) { - QualType Ty = CST->get(1).getNonTypeTemplateArgumentType(); + const auto CheckSize = [this, IsAlignedAlloca, ElementTypeIndex, + SpecNameIndex](const ASTContext &Ctx, + SourceLocation Loc, + const TemplateArgumentList *CST) { + TemplateArgument TA = CST->get(SpecNameIndex); + QualType Ty = TA.getNonTypeTemplateArgumentType(); if (Ty.isNull() || !Ty->isReferenceType()) return true; Ty = Ty->getPointeeType(); @@ -7895,23 +7916,41 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { if (!TAL.get(0).getAsType()->isIntegralType(Ctx)) return true; llvm::APSInt DefaultSize = - getSYCLAllocaDefaultSize(Ctx, cast(CST->get(1).getAsDecl())); + getSYCLAllocaDefaultSize(Ctx, cast(TA.getAsDecl())); if (DefaultSize < 1) Diag(Loc, diag::warn_intel_sycl_alloca_bad_default_value) - << DefaultSize.getSExtValue(); + << IsAlignedAlloca << DefaultSize.getSExtValue(); return false; }; - const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs(); if (CheckSize(getASTContext(), Loc, CST)) { - TemplateArgument TA = CST->get(1); + TemplateArgument TA = CST->get(SpecNameIndex); QualType Ty = TA.getNonTypeTemplateArgumentType(); + const SemaDiagnosticBuilder &D = + Diag(Loc, diag::err_intel_sycl_alloca_wrong_size); + D << IsAlignedAlloca; if (Ty.isNull()) - Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA; + D << TA; else - Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty; + D << Ty; return true; } + if (IsAlignedAlloca) { + TemplateArgument AlignmentArg = CST->get(AlignmentIndex); + llvm::APSInt RequestedAlign = AlignmentArg.getAsIntegral(); + if (!RequestedAlign.isPowerOf2()) + return Diag(Loc, diag::err_alignment_not_power_of_two); + constexpr int32_t MaxAllowedAlign = std::numeric_limits::max() / 8; + if (RequestedAlign > MaxAllowedAlign) + return Diag(Loc, diag::err_alignment_too_big) << MaxAllowedAlign; + QualType AllocaType = CST->get(ElementTypeIndex).getAsType(); + int64_t AllocaRequiredAlignment = + Context.getTypeAlignInChars(AllocaType).getQuantity(); + if (RequestedAlign < AllocaRequiredAlignment) + return Diag(Loc, diag::err_alignas_underaligned) + << AllocaType << AllocaRequiredAlignment; + } + return false; } /// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo diff --git a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp index f1d486304e12..53eec2729aa7 100644 --- a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp +++ b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp @@ -14,6 +14,12 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) multi_ptr private_alloca(kernel_handler &h); +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_private_alloca)]] +multi_ptr aligned_private_alloca(kernel_handler &h); } // namespace experimental } // namesapce oneapi } // namespace ext diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp index f6a58019e1db..eafad68bc0a9 100644 --- a/clang/test/CodeGenSYCL/builtin-alloca.cpp +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s \ // RUN: | FileCheck %s -// Test codegen for __builtin_intel_sycl_alloca. +// Test codegen for __builtin_intel_sycl_alloca and __builtin_intel_sycl_alloca_with_align. #include @@ -53,5 +53,31 @@ SYCL_EXTERNAL void test(sycl::kernel_handler &kh) { // CHECK: declare !sycl_used_aspects ![[#USED_ASPECTS]] ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs +// CHECK-LABEL: define dso_local spir_func void @_Z12test_alignedRN4sycl3_V114kernel_handlerE( +// CHECK-SAME: ptr addrspace(4) noundef align 1 dereferenceable(1) [[KH:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 16) +// CHECK-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 8) +// CHECK-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8 +// CHECK-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 4) +// CHECK-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) +// CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) +// CHECK-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 +// CHECK-NEXT: ret void +SYCL_EXTERNAL void test_aligned(sycl::kernel_handler &kh) { + auto ptr0 = sycl::ext::oneapi::experimental::aligned_private_alloca(kh); + auto ptr1 = sycl::ext::oneapi::experimental::aligned_private_alloca(kh); + auto ptr2 = sycl::ext::oneapi::experimental::aligned_private_alloca(kh); +} + // CHECK-DAG: ![[#USED_ASPECTS]] = !{i32 [[#PRIVATE_ALLOCA_ASPECT:]]} // CHECK-DAG: !{!"ext_oneapi_private_alloca", i32 [[#PRIVATE_ALLOCA_ASPECT]]} diff --git a/clang/test/SemaSYCL/Inputs/private_alloca.hpp b/clang/test/SemaSYCL/Inputs/private_alloca.hpp index 94eb6c007a01..2d675962d4d1 100644 --- a/clang/test/SemaSYCL/Inputs/private_alloca.hpp +++ b/clang/test/SemaSYCL/Inputs/private_alloca.hpp @@ -2,6 +2,8 @@ #include "./sycl.hpp" +#include + namespace sycl { inline namespace _V1 { namespace ext { @@ -13,6 +15,11 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) multi_ptr private_alloca(kernel_handler &h); +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +multi_ptr aligned_private_alloca(kernel_handler &h); } // namespace experimental } // namesapce oneapi } // namespace ext diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp index b4c8c654b2d7..75abec5851d7 100644 --- a/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp +++ b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify %s -// Test errors of __builtin_intel_sycl_alloca when used in SYCL device code. +// Test errors of __builtin_intel_sycl_alloca and +// __builtin_intel_sycl_alloca_with_align when used in SYCL device code. #include @@ -17,6 +18,9 @@ constexpr sycl::specialization_id negative_expr(1 - ten()); constexpr const sycl::specialization_id &negative_expr_ref = negative_expr; +template +constexpr T exp2(unsigned a) { return a == 0 ? 1 : 2 * exp2(a - 1); } + struct wrapped_int { int a; }; struct non_trivial { int a = 1; }; @@ -66,12 +70,63 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) sycl::multi_ptr private_alloca_bad_7(sycl::kernel_handler &); +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_0(); + +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_1(sycl::kernel_handler &h); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_2(sycl::kernel_handler &h); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_3(const wrapped_int &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_4(sycl::kernel_handler); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_5(const sycl::kernel_handler &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_6(sycl::kernel_handler &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +aligned_private_alloca_bad_7(sycl::kernel_handler &); + // expected-error@+4 {{cannot redeclare builtin function 'private_alloca'}} // expected-note@+3 {{'private_alloca' is a builtin with type 'multi_ptr (kernel_handler &)'}} template <> sycl::multi_ptr sycl::ext::oneapi::experimental::private_alloca(sycl::kernel_handler &h); +// expected-error@+4 {{cannot redeclare builtin function 'aligned_private_alloca'}} +// expected-note@+3 {{'aligned_private_alloca' is a builtin with type 'multi_ptr (kernel_handler &)'}} +template <> +sycl::multi_ptr +sycl::ext::oneapi::experimental::aligned_private_alloca(sycl::kernel_handler &h); + void test(sycl::kernel_handler &h) { // expected-error@+1 {{builtin functions must be directly called}} auto funcPtr = sycl::ext::oneapi::experimental::private_alloca; @@ -82,10 +137,10 @@ void test(sycl::kernel_handler &h) { // expected-error@+1 {{too few arguments to function call, expected 1, have 0}} private_alloca_bad_0(); - // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed three template arguments. Got 0}} + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed 3 template arguments. Got 0}} private_alloca_bad_1(h); - // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed three template arguments. Got 1}} + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed 3 template arguments. Got 1}} private_alloca_bad_2(h); // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'const wrapped_int &'}} @@ -132,4 +187,72 @@ void test(sycl::kernel_handler &h) { // expected-warning@+1 {{__builtin_intel_sycl_alloca expects a specialization constant with a default value of at least one as an argument. Got -9}} sycl::ext::oneapi::experimental::private_alloca(h); + + constexpr size_t alignment = 16; + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align cannot be used in source code. Use the aligned_private_alloca alias instead}} + __builtin_intel_sycl_alloca_with_align(h); + + // expected-error@+1 {{too few arguments to function call, expected 1, have 0}} + aligned_private_alloca_bad_0(); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align expects to be passed 4 template arguments. Got 0}} + aligned_private_alloca_bad_1(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align expects to be passed 4 template arguments. Got 1}} + aligned_private_alloca_bad_2(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'const wrapped_int &'}} + aligned_private_alloca_bad_3(wrapped_int{10}); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'sycl::kernel_handler'}} + aligned_private_alloca_bad_4(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'const sycl::kernel_handler &'}} + aligned_private_alloca_bad_5(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'sycl::multi_ptr'}} + aligned_private_alloca_bad_6(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align can only return 'sycl::private_ptr' to a cv-unqualified trivial type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align must be passed a specialization constant of integral value type as a template argument. Got 'int'}} + aligned_private_alloca_bad_7(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca_with_align must be passed a specialization constant of integral value type as a template argument. Got 'const sycl::specialization_id &'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{requested alignment is not a power of 2}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-error@+1 {{requested alignment must be 268435455 or smaller}} + sycl::ext::oneapi::experimental::aligned_private_alloca(60), size, sycl::access::decorated::yes>(h); + + // expected-error@+1 {{requested alignment is less than minimum alignment of 4 for type 'float'}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-warning@+1 {{__builtin_intel_sycl_alloca_with_align expects a specialization constant with a default value of at least one as an argument. Got 0}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-warning@+1 {{__builtin_intel_sycl_alloca_with_align expects a specialization constant with a default value of at least one as an argument. Got -1}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); + + // expected-warning@+1 {{__builtin_intel_sycl_alloca_with_align expects a specialization constant with a default value of at least one as an argument. Got -9}} + sycl::ext::oneapi::experimental::aligned_private_alloca(h); } diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp index 49ec04817c38..4d80b61c076b 100644 --- a/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp +++ b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify %s -// Test errors of __builtin_intel_sycl_alloca when used in targets other than SYCL devices. +// Test errors of __builtin_intel_sycl_alloca and +// __builtin_intel_sycl_alloca_with_align when used in targets other than SYCL devices. #include @@ -15,7 +16,15 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) sycl::multi_ptr private_alloca_bad_0(sycl::kernel_handler &h); +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +sycl::multi_ptr +private_aligned_alloca_bad_0(sycl::kernel_handler &h); + void test(sycl::kernel_handler &h) { // expected-error@+1 {{'__builtin_intel_sycl_alloca' is only available in SYCL device}} private_alloca_bad_0(h); + // expected-error@+1 {{'__builtin_intel_sycl_alloca_with_align' is only available in SYCL device}} + private_aligned_alloca_bad_0(h); } diff --git a/clang/test/SemaSYCL/builtin-alloca.cpp b/clang/test/SemaSYCL/builtin-alloca.cpp index a9653c8bf587..764bec6eae34 100644 --- a/clang/test/SemaSYCL/builtin-alloca.cpp +++ b/clang/test/SemaSYCL/builtin-alloca.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify -Wpedantic %s -// Test verification of __builtin_intel_sycl_alloca when used in different valid ways. +// Test verification of __builtin_intel_sycl_alloca and +// __builtin_intel_sycl_alloca_with_align when used in different valid ways. #include @@ -25,4 +26,11 @@ void basic_test(sycl::kernel_handler &kh) { float, intSize, sycl::access::decorated::no>(kh); sycl::ext::oneapi::experimental::private_alloca< myStruct, shortSize, sycl::access::decorated::legacy>(kh); + + sycl::ext::oneapi::experimental::aligned_private_alloca< + int, alignof(int), size, sycl::access::decorated::yes>(kh); + sycl::ext::oneapi::experimental::aligned_private_alloca< + float, alignof(float) * 2, intSize, sycl::access::decorated::no>(kh); + sycl::ext::oneapi::experimental::aligned_private_alloca< + myStruct, alignof(myStruct) * 2, shortSize, sycl::access::decorated::legacy>(kh); } diff --git a/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp b/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp index f06af4b44112..e6ed5123e4c5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp @@ -38,9 +38,26 @@ __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) [[__sycl_detail__::__uses_aspects__(aspect::ext_oneapi_private_alloca)]] private_ptr< ElementType, DecorateAddress> private_alloca(kernel_handler &kh); +// On the device, this is an alias to __builtin_intel_sycl_alloca_with_align. + +/// Function allocating and returning a pointer to an unitialized region of +/// memory capable of hosting `kh.get_specialization_constant()` +/// elements of type \tp ElementType. The pointer will be a `sycl::private_ptr` +/// and will or will not be decorated depending on \tp DecorateAddres. The +/// pointer will be aligned to `Alignment`. +/// +/// On the host, this function simply throws, as this is not supported there. +/// +/// See sycl_ext_oneapi_private_alloca. +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca_with_align) +[[__sycl_detail__::__uses_aspects__(aspect::ext_oneapi_private_alloca)]] private_ptr< + ElementType, DecorateAddress> aligned_private_alloca(kernel_handler &kh); + #else -// On the host, throw, this is not supported. +// On the host, throw, these are not supported. template private_ptr private_alloca(kernel_handler &kh) { @@ -49,6 +66,15 @@ private_ptr private_alloca(kernel_handler &kh) { PI_ERROR_INVALID_OPERATION); } +template +private_ptr +aligned_private_alloca(kernel_handler &kh) { + throw feature_not_supported("sycl::ext::oneapi::experimental::aligned_" + "private_alloca is not supported on host", + PI_ERROR_INVALID_OPERATION); +} + #endif // __SYCL_DEVICE_ONLY__ } // namespace ext::oneapi::experimental diff --git a/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp index 5f350e2712be..5a5c206f09e2 100644 --- a/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp +++ b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp @@ -11,30 +11,38 @@ #include #include -class Kernel; +template class Kernel; constexpr sycl::specialization_id Size(10); -static std::error_code test() { +template static void test(Func f) { + constexpr size_t N = 10; sycl::queue Queue; - sycl::buffer B(10); + sycl::buffer B(N); try { Queue.submit([&](sycl::handler &Cgh) { sycl::accessor Acc(B, Cgh, sycl::write_only, sycl::no_init); - Cgh.parallel_for(10, [=](sycl::id<1>, sycl::kernel_handler Kh) { - sycl::ext::oneapi::experimental::private_alloca< - int, Size, sycl::access::decorated::no>(Kh); - }); + Cgh.parallel_for>( + N, [=](sycl::id<1>, sycl::kernel_handler Kh) { f(Kh); }); }); } catch (sycl::exception &Exception) { - return Exception.code(); + assert(Exception.code() == sycl::errc::invalid && "Unexpected error code"); + return; } assert(false && "Exception not thrown"); } int main() { - assert(test() == sycl::errc::invalid && "Unexpected error code"); + test<0>([](sycl::kernel_handler &Kh) { + sycl::ext::oneapi::experimental::private_alloca< + int, Size, sycl::access::decorated::no>(Kh); + }); + + test<1>([](sycl::kernel_handler &Kh) { + sycl::ext::oneapi::experimental::aligned_private_alloca< + int, alignof(int64_t), Size, sycl::access::decorated::no>(Kh); + }); return 0; } diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp index ebacb016adff..241eac2d4cd3 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp @@ -8,11 +8,23 @@ #include template + sycl::access::decorated DecorateAddress, std::size_t Alignment> class Kernel; template + sycl::access::decorated DecorateAddress, std::size_t Alignment> +static auto allocate(sycl::kernel_handler &kh) { + if constexpr (Alignment > 0) { + return sycl::ext::oneapi::experimental::aligned_private_alloca< + ElementType, Alignment, Size, DecorateAddress>(kh); + } else { + return sycl::ext::oneapi::experimental::private_alloca(kh); + } +} + +template void test() { std::size_t N; @@ -27,10 +39,11 @@ void test() { cgh.set_specialization_constant(N); using spec_const_type = std::remove_reference_t; using size_type = typename spec_const_type::value_type; - cgh.single_task>( + cgh.single_task< + Kernel>( [=](sycl::kernel_handler h) { - auto ptr = sycl::ext::oneapi::experimental::private_alloca< - ElementType, Size, DecorateAddress>(h); + auto ptr = + allocate(h); const std::size_t M = h.get_specialization_constant(); ptr[0] = static_cast(M); ElementType value{1}; diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp index e427307918f7..d382843467b1 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp @@ -8,4 +8,7 @@ constexpr sycl::specialization_id size(true); -int main() { test(); } +int main() { + test(); + test(); +} diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp index 4420a44626df..a483a7cf975c 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp @@ -11,4 +11,7 @@ constexpr sycl::specialization_id size(10); -int main() { test(); } +int main() { + test(); + test(); +} diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp index e22237ff5802..a4b091dc5e9c 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp @@ -11,4 +11,7 @@ constexpr sycl::specialization_id size(10); -int main() { test(); } +int main() { + test(); + test(); +} diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp index 9663d1f21530..6d4447191d3e 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp @@ -14,4 +14,8 @@ int main() { test(); test(); test(); + + test(); + test(); + test(); } diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp index cc6eb7a57c86..ca52e6f3f67c 100644 --- a/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp @@ -56,4 +56,8 @@ class value_and_sign { bool no_less_than_zero; }; -int main() { test(); } +int main() { + test(); + test(); +} diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp b/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp index 5040bc2c4e3e..04ae3d989877 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp +++ b/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp @@ -11,14 +11,24 @@ constexpr sycl::specialization_id size(10); -int main() { +template static void test(Func f) { try { - std::array h; - sycl::ext::oneapi::experimental::private_alloca< - float, size, sycl::access::decorated::no>( - *reinterpret_cast(h.data())); + f(); } catch (sycl::exception &e) { assert(e.code() == sycl::errc::feature_not_supported && "Unexpected error code"); } } + +int main() { + std::array h; + auto &kh = *reinterpret_cast(h.data()); + test([&kh]() { + sycl::ext::oneapi::experimental::aligned_private_alloca< + float, alignof(double), size, sycl::access::decorated::no>(kh); + }); + test([&kh]() { + sycl::ext::oneapi::experimental::private_alloca< + float, size, sycl::access::decorated::no>(kh); + }); +} diff --git a/sycl/test/extensions/private_alloca.cpp b/sycl/test/extensions/private_alloca.cpp new file mode 100644 index 000000000000..7687915436a8 --- /dev/null +++ b/sycl/test/extensions/private_alloca.cpp @@ -0,0 +1,149 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -c -o %t.bc %s +// RUN: %if asserts %{sycl-post-link -debug-only=SpecConst %t.bc -spec-const=native -o %t.txt 2>&1 | FileCheck %s -check-prefixes=CHECK-LOG %} %else %{sycl-post-link %t.bc -spec-const=native -o %t.txt 2>&1 %} +// RUN: cat %t_0.prop | FileCheck %s -check-prefixes=CHECK,CHECK-RT +// RUN: llvm-spirv -o %t_0.spv -spirv-max-version=1.1 -spirv-ext=+all %t_0.bc +// RUN: llvm-spirv -o - --to-text %t_0.spv | FileCheck %s -check-prefixes=CHECK-SPV + +// Check SPIR-V code generation for 'sycl_ext_oneapi_private_alloca'. Each call +// to the extension API is annotated as follows for future reference: +// +// : storage_class=, element_type=, alignment= +// +// - : Variable name in the test below. These will be the result of +// bitcasting a variable to a different pointer type. We use this instead of the +// variable due to FileCheck limitations. +// - : 'generic' if is casted to generic before being stored in the +// multi_ptr or 'function' otherwise. +// - : element type. 'Bitcast X Y' will originate value , being +// X a pointer to and storage class function. +// - : alignment. will appear in a 'Decorage Aligment +// ' operation. + +#include + +#include +#include + +enum class enumeration { a, b, c }; + +struct composite { + int a; + int b; + composite() = default; +}; + +constexpr sycl::specialization_id int8_id(42); +constexpr sycl::specialization_id int16_id(34); +constexpr sycl::specialization_id uint32_id(46); +constexpr sycl::specialization_id int32_id(52); +constexpr sycl::specialization_id uint64_id(81); + +template SYCL_EXTERNAL void keep(const Ts &...); + +SYCL_EXTERNAL void test(sycl::kernel_handler &kh) { + keep(/*B0: storage_class=function, element_type=f32, alignment=4*/ + sycl::ext::oneapi::experimental::private_alloca< + float, int8_id, sycl::access::decorated::yes>(kh), + /*B1: storage_class=generic, element_type=f64, alignment=8*/ + sycl::ext::oneapi::experimental::private_alloca< + double, uint32_id, sycl::access::decorated::no>(kh), + /*B2: storage_class=function, element_type=i32, alignment=4*/ + sycl::ext::oneapi::experimental::private_alloca< + int, int16_id, sycl::access::decorated::legacy>(kh), + /*B3: storage_class=generic, element_type=i64, alignment=16*/ + sycl::ext::oneapi::experimental::aligned_private_alloca< + int64_t, alignof(int64_t) * 2, uint64_id, + sycl::access::decorated::no>(kh), + /*B4: storage_class=function, element_type=composite, alignment=32*/ + sycl::ext::oneapi::experimental::aligned_private_alloca< + composite, alignof(composite) * 8, int32_id, + sycl::access::decorated::yes>(kh)); +} + +// CHECK: [SYCL/specialization constants] +// CHECK-DAG: [[UNIQUE_PREFIX:[a-z0-9]+]]____ZL7int8_id=2| +// CHECK-DAG: [[UNIQUE_PREFIX]]____ZL8int16_id=2| +// CHECK-DAG: [[UNIQUE_PREFIX]]____ZL8int32_id=2| +// CHECK-DAG: [[UNIQUE_PREFIX]]____ZL9uint32_id=2| +// CHECK-DAG: [[UNIQUE_PREFIX]]____ZL9uint64_id=2| + +// CHECK-RT: [SYCL/specialization constants default values] +// CHECK-DEF: [SYCL/specialization constants default values] +// CHECK-DEF: all=2| + +// CHECK-LOG: sycl.specialization-constants +// CHECK-LOG:[[UNIQUE_PREFIX:[a-z0-9]+]]____ZL7int8_id={0, 0, 1} +// CHECK-NEXT-LOG:[[UNIQUE_PREFIX]]____ZL8int16_id={2, 0, 2} +// CHECK-NEXT-LOG:[[UNIQUE_PREFIX]]____ZL8int32_id={4, 0, 4} +// CHECK-NEXT-LOG:[[UNIQUE_PREFIX]]____ZL9uint32_id={5, 0, 4} +// CHECK-NEXT-LOG:[[UNIQUE_PREFIX]]____ZL8int64_id={6, 0, 8} +// CHECK-NEXT-LOG:{0, 1, 42} +// CHECK-NEXT-LOG:{2, 2, 34} +// CHECK-NEXT-LOG:{6, 4, 52} +// CHECK-NEXT-LOG:{10, 4, 46} +// CHECK-NEXT-LOG:{22, 8, 81} + +// CHECK-SPV-DAG: Name [[#B0:]] "alloca1" +// CHECK-SPV-DAG: Name [[#B1:]] "alloca" +// CHECK-SPV-DAG: Name [[#B2:]] "alloca2" +// CHECK-SPV-DAG: Name [[#B3:]] "alloca3" +// CHECK-SPV-DAG: Name [[#B4:]] "alloca4" + +// CHECK-SPV-DAG: Decorate [[#SPEC0:]] SpecId 0 +// CHECK-SPV-DAG: Decorate [[#SPEC1:]] SpecId 1 +// CHECK-SPV-DAG: Decorate [[#SPEC2:]] SpecId 2 +// CHECK-SPV-DAG: Decorate [[#SPEC3:]] SpecId 3 +// CHECK-SPV-DAG: Decorate [[#SPEC4:]] SpecId 4 +// CHECK-SPV-DAG: Decorate [[#B0]] Alignment 4 +// CHECK-SPV-DAG: Decorate [[#B1]] Alignment 8 +// CHECK-SPV-DAG: Decorate [[#B2]] Alignment 4 +// CHECK-SPV-DAG: Decorate [[#B3]] Alignment 16 +// CHECK-SPV-DAG: Decorate [[#B4]] Alignment 32 + +// CHECK-SPV-DAG: TypeInt [[#I8TY:]] 8 0 +// CHECK-SPV-DAG: TypeInt [[#I16TY:]] 16 0 +// CHECK-SPV-DAG: TypeInt [[#I32TY:]] 32 0 +// CHECK-SPV-DAG: TypeInt [[#I64TY:]] 64 0 +// CHECK-SPV-DAG: TypeFloat [[#F32TY:]] 32 +// CHECK-SPV-DAG: TypeFloat [[#F64TY:]] 64 +// CHECK-SPV-DAG: TypeStruct [[#COMPTY:]] [[#I32TY]] [[#I32TY]] + +// CHECK-SPV-DAG: SpecConstant [[#I8TY]] [[#SPEC0]] 42 +// CHECK-SPV-DAG: SpecConstant [[#I32TY]] [[#SPEC1]] 46 +// CHECK-SPV-DAG: SpecConstant [[#I16TY]] [[#SPEC2]] 34 +// CHECK-SPV-DAG: SpecConstant [[#I64TY]] [[#SPEC3]] 81 +// CHECK-SPV-DAG: SpecConstant [[#I32TY]] [[#SPEC4]] 52 + +// CHECK-SPV-DAG: TypeArray [[#ARRF32TY:]] [[#F32TY]] [[#SPEC0]] +// CHECK-SPV-DAG: TypePointer [[#ARRF32PTRTY:]] [[#FUNCTIONSTORAGE:]] [[#ARRF32TY]] +// CHECK-SPV-DAG: TypePointer [[#F32PTRTY:]] [[#FUNCTIONSTORAGE]] [[#F32TY]] +// CHECK-SPV-DAG: TypeArray [[#ARRF64TY:]] [[#F64TY]] [[#SPEC1]] +// CHECK-SPV-DAG: TypePointer [[#ARRF64PTRTY:]] [[#FUNCTIONSTORAGE]] [[#ARRF64TY]] +// CHECK-SPV-DAG: TypePointer [[#F64PTRTY:]] [[#FUNCTIONSTORAGE]] [[#F64TY]] +// CHECK-SPV-DAG: TypeArray [[#ARRI32TY:]] [[#I32TY]] [[#SPEC2]] +// CHECK-SPV-DAG: TypePointer [[#ARRI32PTRTY:]] [[#FUNCTIONSTORAGE]] [[#ARRI32TY]] +// CHECK-SPV-DAG: TypePointer [[#I32PTRTY:]] [[#FUNCTIONSTORAGE]] [[#I32TY]] +// CHECK-SPV-DAG: TypeArray [[#ARRI64TY:]] [[#I64TY]] [[#SPEC3]] +// CHECK-SPV-DAG: TypePointer [[#ARRI64PTRTY:]] [[#FUNCTIONSTORAGE]] [[#ARRI64TY]] +// CHECK-SPV-DAG: TypePointer [[#I64PTRTY:]] [[#FUNCTIONSTORAGE]] [[#I64TY]] +// CHECK-SPV-DAG: TypeArray [[#ARRCOMPTY:]] [[#COMPTY]] [[#SPEC4]] +// CHECK-SPV-DAG: TypePointer [[#ARRCOMPPTRTY:]] [[#FUNCTIONSTORAGE]] [[#ARRCOMPTY]] +// CHECK-SPV-DAG: TypePointer [[#COMPPTRTY:]] [[#FUNCTIONSTORAGE]] [[#COMPTY]] + +// CHECK-SPV-DAG: Variable [[#ARRF32PTRTY]] [[#V0:]] [[#FUNCTIONSTORAGE]] +// CHECK-SPV-DAG: Bitcast [[#F32PTRTY]] [[#B0]] [[#V0]] +// CHECK-SPV-DAG: Store {{.*}} [[#B0]] +// CHECK-SPV-DAG: Variable [[#ARRF64PTRTY]] [[#V1:]] [[#FUNCTIONSTORAGE]] +// CHECK-SPV-DAG: Bitcast [[#F64PTRTY]] [[#B1]] [[#V1]] +// CHECK-SPV-DAG: PtrCastToGeneric {{.*}} [[#G1:]] [[#B1]] +// CHECK-SPV-DAG: Store {{.*}} [[#G1]] +// CHECK-SPV-DAG: Variable [[#ARRI32PTRTY]] [[#V2:]] [[#FUNCTIONSTORAGE]] +// CHECK-SPV-DAG: Bitcast [[#I32PTRTY]] [[#B2]] [[#V2]] +// CHECK-SPV-DAG: Store {{.*}} [[#B2]] +// CHECK-SPV-DAG: Variable [[#ARRI64PTRTY]] [[#V3:]] [[#FUNCTIONSTORAGE]] +// CHECK-SPV-DAG: Bitcast [[#I64PTRTY]] [[#B3]] [[#V3]] +// CHECK-SPV-DAG: PtrCastToGeneric {{.*}} [[#G3:]] [[#B3]] +// CHECK-SPV-DAG: Store {{.*}} [[#G3]] +// CHECK-SPV-DAG: Variable [[#ARRCOMPPTRTY]] [[#V4:]] [[#FUNCTIONSTORAGE]] +// CHECK-SPV-DAG: Bitcast [[#COMPPTRTY]] [[#B4]] [[#V4]] +// CHECK-SPV-DAG: Store {{.*}} [[#B4]] diff --git a/sycl/test/optional_kernel_features/private_alloca.cpp b/sycl/test/optional_kernel_features/private_alloca.cpp index 041b9e95378c..3652f8c21d11 100644 --- a/sycl/test/optional_kernel_features/private_alloca.cpp +++ b/sycl/test/optional_kernel_features/private_alloca.cpp @@ -7,9 +7,13 @@ #include -class Kernel; +class Kernel0; +class Kernel1; -// CHECK-LABEL: spir_kernel void @_ZTS6Kernel +// CHECK-LABEL: spir_kernel void @_ZTS7Kernel0 +// CHECK-SAME: !sycl_used_aspects ![[#USED_ASPECTS:]] + +// CHECK-LABEL: spir_kernel void @_ZTS7Kernel1 // CHECK-SAME: !sycl_used_aspects ![[#USED_ASPECTS:]] // CHECK: ![[#USED_ASPECTS]] = !{i32 64} @@ -19,13 +23,24 @@ constexpr static sycl::specialization_id size(10); SYCL_EXTERNAL void foo(sycl::id<1> i, int *a, sycl::decorated_private_ptr tmp); -void test(sycl::queue q, sycl::range<1> r, int *a, int s) { +void test0(sycl::queue q, sycl::range<1> r, int *a, int s) { q.submit([&](sycl::handler &cgh) { cgh.set_specialization_constant(s); - cgh.parallel_for(r, [=](sycl::id<1> i, sycl::kernel_handler kh) { + cgh.parallel_for(r, [=](sycl::id<1> i, sycl::kernel_handler kh) { foo(i, a, sycl::ext::oneapi::experimental::private_alloca< int, size, sycl::access::decorated::yes>(kh)); }); }); } + +void test1(sycl::queue q, sycl::range<1> r, int *a, int s) { + q.submit([&](sycl::handler &cgh) { + cgh.set_specialization_constant(s); + cgh.parallel_for(r, [=](sycl::id<1> i, sycl::kernel_handler kh) { + foo(i, a, + sycl::ext::oneapi::experimental::aligned_private_alloca< + int, alignof(int) * 2, size, sycl::access::decorated::yes>(kh)); + }); + }); +}