Skip to content

Commit

Permalink
[core] Fix calling convention differences between libstdc++ and libc++ (
Browse files Browse the repository at this point in the history
NVIDIA#2188)

* [core] Fix calling convention differences between libstdc++ and libc++
for small structs.

In libc++, the compiler uses a "zero-cost" model of passing small struct
values. In libstdc++, this was never fixed in order to maintain
backwards compatibility with older compilers. So small structs are
passed on the stack via references.  See https://tinyurl.com/bdhrkx9x for
more information.

These patches fix nvq++ to adapt the calling convention to the header
files in use at compile time.

* Fix typo in comment.

* Update tests to expect optional width, align info.
  • Loading branch information
schweitzpgi committed Sep 5, 2024
1 parent bdea842 commit 5b030df
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 53 deletions.
2 changes: 1 addition & 1 deletion docker/build/assets.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ RUN cd /cuda-quantum && source scripts/configure_build.sh && \
--param nvqpp_site_config=build/test/lit.site.cfg.py ${filtered} && \
# FIXME: Some tests are still failing when building against libc++
# tracked in https://github.com/NVIDIA/cuda-quantum/issues/1712
filtered=" --filter-out Kernel/(inline-qpu-func|signature-2)" && \
filtered=" --filter-out Kernel/inline-qpu-func" && \
if [ ! -x "$(command -v nvcc)" ]; then \
filtered+="|TargetConfig/check_compile"; \
fi && \
Expand Down
1 change: 1 addition & 0 deletions include/cudaq/Frontend/nvqpp/ASTBridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ class QuakeBridgeVisitor

bool TraverseRecordType(clang::RecordType *t);
bool interceptRecordDecl(clang::RecordDecl *x);
std::pair<std::uint64_t, unsigned> getWidthAndAlignment(clang::RecordDecl *x);
bool VisitRecordDecl(clang::RecordDecl *x);

// Type declarations to be converted to high-level Quake and CC types are
Expand Down
12 changes: 9 additions & 3 deletions lib/Frontend/nvqpp/ConvertDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ bool QuakeBridgeVisitor::interceptRecordDecl(clang::RecordDecl *x) {
return false;
members.push_back(popType());
}
return pushType(cc::StructType::get(ctx, members));
auto [width, align] = getWidthAndAlignment(x);
return pushType(cc::StructType::get(ctx, members, width, align));
}
if (name.equals("tuple")) {
auto *cts = cast<clang::ClassTemplateSpecializationDecl>(x);
Expand All @@ -265,9 +266,14 @@ bool QuakeBridgeVisitor::interceptRecordDecl(clang::RecordDecl *x) {
return false;
members.push_back(popType());
}
if (tuplesAreReversed)
auto [width, align] = getWidthAndAlignment(x);
if (tuplesAreReversed) {
std::reverse(members.begin(), members.end());
return pushType(cc::StructType::get(ctx, members));
// Resets are for libstdc++ calling convention compatibility.
width = 0;
align = 0;
}
return pushType(cc::StructType::get(ctx, members, width, align));
}
if (ignoredClass(x))
return true;
Expand Down
21 changes: 12 additions & 9 deletions lib/Frontend/nvqpp/ConvertType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,17 @@ bool QuakeBridgeVisitor::TraverseRecordType(clang::RecordType *t) {
return true;
}

std::pair<std::uint64_t, unsigned>
QuakeBridgeVisitor::getWidthAndAlignment(clang::RecordDecl *x) {
auto *defn = x->getDefinition();
assert(defn && "struct must be defined here");
auto *ty = defn->getTypeForDecl();
if (ty->isDependentType())
return {0, 0};
auto ti = getContext()->getTypeInfo(ty);
return {ti.Width, llvm::PowerOf2Ceil(ti.Align) / 8};
}

bool QuakeBridgeVisitor::VisitRecordDecl(clang::RecordDecl *x) {
assert(!x->isLambda() && "expected lambda to be handled in traverse");
// Note that we're generating a Type on the type stack.
Expand All @@ -232,15 +243,7 @@ bool QuakeBridgeVisitor::VisitRecordDecl(clang::RecordDecl *x) {
return pushType(cc::StructType::get(ctx, name, /*isOpaque=*/true));
SmallVector<Type> fieldTys =
lastTypes(std::distance(x->field_begin(), x->field_end()));
auto [width, alignInBytes] = [&]() -> std::pair<std::uint64_t, unsigned> {
auto *defn = x->getDefinition();
assert(defn && "struct must be defined here");
auto *ty = defn->getTypeForDecl();
if (ty->isDependentType())
return {0, 0};
auto ti = getContext()->getTypeInfo(ty);
return {ti.Width, llvm::PowerOf2Ceil(ti.Align) / 8};
}();
auto [width, alignInBytes] = getWidthAndAlignment(x);
if (name.empty())
return pushType(cc::StructType::get(ctx, fieldTys, width, alignInBytes));
return pushType(
Expand Down
11 changes: 11 additions & 0 deletions targettests/Kernel/signature-2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ struct T2 {
}
};

struct T3 {
void operator()(std::pair<long, double> tup) __qpu__ {
show5(std::get<0>(tup));
show7(std::get<1>(tup));
}
};

int main() {
VectorOfStruct vsData = {{1, 1.0f, 95.0}, {2, 18.4f, 86.945}};
VS{}(vsData);
Expand All @@ -164,6 +171,9 @@ int main() {

std::tuple<long, double> t2{2098, 99.5};
T2{}(t2);

std::pair<long, double> t3{34061, 1999.2};
T3{}(t3);

return 0;
}
Expand All @@ -175,3 +185,4 @@ int main() {
// CHECK: 1.200 2.400 4.800
// CHECK: 234 89238 3.140 Z
// CHECK: 2098 99.5
// CHECK: 34061 1999.2
36 changes: 18 additions & 18 deletions test/AST-Quake/tuple-0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ struct ArithmeticTupleQernel {

// clang-format off
// CHECK-LABEL: func.func @__nvqpp__mlirgen__ArithmeticTupleQernel(
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP:.*, .*, .*, .*]]}>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP:.*, .*, .*, .*]]}{{.*}}>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<1>
// CHECK: %[[VAL_3:.*]] = quake.mz %[[VAL_2]] : (!quake.veq<1>) -> !cc.stdvec<!quake.measure>
// CHECK: return
Expand All @@ -37,9 +37,9 @@ struct ArithmeticPairQernel {

// clang-format off
// CHECK-LABEL: func.func @__nvqpp__mlirgen__ArithmeticPairQernel(
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{f32, i32}>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{f32, i32}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{f32, i32}>>
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{f32, i32} [64,4]>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{f32, i32} [64,4]>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{f32, i32} [64,4]>>
// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<1>
// CHECK: %[[VAL_3:.*]] = quake.mz %[[VAL_2]] : (!quake.veq<1>) -> !cc.stdvec<!quake.measure>
// CHECK: return
Expand All @@ -56,12 +56,12 @@ struct ArithmeticTupleQernelWithUse {

// clang-format off
// CHECK-LABEL: func.func @__nvqpp__mlirgen__ArithmeticTupleQernelWithUse(
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP]]}>)
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP]]}{{.*}}>)
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i64
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: %[[VAL_4:.*]] = cc.c{{.*}} %[[VAL_3]]{{.*}} : (!cc.ptr<!cc.struct<{[[TUP]]}>>) -> !cc.ptr<i32>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: %[[VAL_4:.*]] = cc.c{{.*}} %[[VAL_3]]{{.*}} : (!cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>) -> !cc.ptr<i32>
// CHECK: %[[VAL_5:.*]] = cc.load %[[VAL_4]] : !cc.ptr<i32>
// CHECK: %[[VAL_6:.*]] = cc.cast signed %[[VAL_5]] : (i32) -> i64
// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<?>[%[[VAL_6]] : i64]
Expand Down Expand Up @@ -94,12 +94,12 @@ struct ArithmeticTupleQernelWithUse0 {

// clang-format off
// CHECK-LABEL: func.func @__nvqpp__mlirgen__ArithmeticTupleQernelWithUse0(
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP]]}>)
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{[[TUP]]}{{.*}}>)
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i64
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: %[[VAL_4:.*]] = cc.c{{.*}} %[[VAL_3]]{{.*}} : (!cc.ptr<!cc.struct<{[[TUP]]}>>) -> !cc.ptr<i32>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: %[[VAL_4:.*]] = cc.c{{.*}} %[[VAL_3]]{{.*}} : (!cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>) -> !cc.ptr<i32>
// CHECK: %[[VAL_5:.*]] = cc.load %[[VAL_4]] : !cc.ptr<i32>
// CHECK: %[[VAL_6:.*]] = cc.cast signed %[[VAL_5]] : (i32) -> i64
// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<?>[%[[VAL_6]] : i64]
Expand Down Expand Up @@ -131,10 +131,10 @@ struct ArithmeticPairQernelWithUse {

// clang-format off
// CHECK-LABEL: func.func @__nvqpp__mlirgen__ArithmeticPairQernelWithUse(
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{f32, i32}>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{f32, i32}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{f32, i32}>>
// CHECK: %[[VAL_2:.*]] = cc.compute_ptr %[[VAL_1]][1] : (!cc.ptr<!cc.struct<{f32, i32}>>) -> !cc.ptr<i32>
// CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<{f32, i32} [64,4]>)
// CHECK: %[[VAL_1:.*]] = cc.alloca !cc.struct<{f32, i32} [64,4]>
// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.struct<{f32, i32} [64,4]>>
// CHECK: %[[VAL_2:.*]] = cc.compute_ptr %[[VAL_1]][1] : (!cc.ptr<!cc.struct<{f32, i32} [64,4]>>) -> !cc.ptr<i32>
// CHECK: %[[VAL_3:.*]] = cc.load %[[VAL_2]] : !cc.ptr<i32>
// CHECK: %[[VAL_4:.*]] = cc.cast signed %[[VAL_3]] : (i32) -> i64
// CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq<?>[%[[VAL_4]] : i64]
Expand Down
44 changes: 22 additions & 22 deletions test/AST-Quake/tuple-1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,29 @@ int main1() {
}

// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_prepQubit
// CHECK-SAME: (%[[VAL_0:.*]]: !cc.struct<{i32, f64}>, %[[VAL_1:.*]]: !quake.ref)
// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.struct<{i32, f64}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<!cc.struct<{i32, f64}>>
// CHECK-SAME: (%[[VAL_0:.*]]: !cc.struct<{i32, f64} [128,8]>, %[[VAL_1:.*]]: !quake.ref)
// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.struct<{i32, f64} [128,8]>
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<!cc.struct<{i32, f64} [128,8]>>
// CHECK: return

// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_RzArcTan2
// CHECK-SAME: (%[[VAL_0:.*]]: i1, %[[VAL_1:.*]]: !cc.struct<{i32, f64}>) attributes
// CHECK-SAME: (%[[VAL_0:.*]]: i1, %[[VAL_1:.*]]: !cc.struct<{i32, f64} [128,8]>) attributes
// CHECK: %[[VAL_2:.*]] = cc.alloca i1
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<i1>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{i32, f64}>
// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr<!cc.struct<{i32, f64}>>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{i32, f64} [128,8]>
// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr<!cc.struct<{i32, f64} [128,8]>>
// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_5:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_6:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_7:.*]] = cc.load %[[VAL_2]] : !cc.ptr<i1>
// CHECK: cc.if(%[[VAL_7]]) {
// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> ()
// CHECK: }
// CHECK: %[[VAL_8:.*]] = cc.alloca !cc.struct<{i32, f64}>
// CHECK: %[[VAL_9:.*]] = cc.load %[[VAL_3]] : !cc.ptr<!cc.struct<{i32, f64}>>
// CHECK: cc.store %[[VAL_9]], %[[VAL_8]] : !cc.ptr<!cc.struct<{i32, f64}>>
// CHECK: %[[VAL_10:.*]] = cc.load %[[VAL_8]] : !cc.ptr<!cc.struct<{i32, f64}>>
// CHECK: call @__nvqpp__mlirgen__function_prepQubit{{.*}}(%[[VAL_10]], %[[VAL_6]]) : (!cc.struct<{i32, f64}>, !quake.ref) -> ()
// CHECK: %[[VAL_8:.*]] = cc.alloca !cc.struct<{i32, f64} [128,8]>
// CHECK: %[[VAL_9:.*]] = cc.load %[[VAL_3]] : !cc.ptr<!cc.struct<{i32, f64} [128,8]>>
// CHECK: cc.store %[[VAL_9]], %[[VAL_8]] : !cc.ptr<!cc.struct<{i32, f64} [128,8]>>
// CHECK: %[[VAL_10:.*]] = cc.load %[[VAL_8]] : !cc.ptr<!cc.struct<{i32, f64} [128,8]>>
// CHECK: call @__nvqpp__mlirgen__function_prepQubit{{.*}}(%[[VAL_10]], %[[VAL_6]]) : (!cc.struct<{i32, f64} [128,8]>, !quake.ref) -> ()
// CHECK: return
// CHECK: }

Expand All @@ -75,29 +75,29 @@ int main2() {
}

// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_prepQubit2
// CHECK-SAME: (%[[VAL_0:.*]]: !cc.struct<{[[TUP:.*, .*, .*]]}>, %[[VAL_1:.*]]: !quake.ref)
// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK-SAME: (%[[VAL_0:.*]]: !cc.struct<{[[TUP:.*, .*, .*]]}{{.*}}>, %[[VAL_1:.*]]: !quake.ref)
// CHECK: %[[VAL_2:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: return
// CHECK: }

// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_RzArcTan22
// CHECK-SAME: (%[[VAL_0:.*]]: i1, %[[VAL_1:.*]]: !cc.struct<{[[TUP]]}>) attributes
// CHECK-SAME: (%[[VAL_0:.*]]: i1, %[[VAL_1:.*]]: !cc.struct<{[[TUP]]}{{.*}}>) attributes
// CHECK: %[[VAL_2:.*]] = cc.alloca i1
// CHECK: cc.store %[[VAL_0]], %[[VAL_2]] : !cc.ptr<i1>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_5:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_6:.*]] = quake.alloca !quake.ref
// CHECK: %[[VAL_7:.*]] = cc.load %[[VAL_2]] : !cc.ptr<i1>
// CHECK: cc.if(%[[VAL_7]]) {
// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> ()
// CHECK: }
// CHECK: %[[VAL_8:.*]] = cc.alloca !cc.struct<{[[TUP]]}>
// CHECK: %[[VAL_9:.*]] = cc.load %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: cc.store %[[VAL_9]], %[[VAL_8]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: %[[VAL_10:.*]] = cc.load %[[VAL_8]] : !cc.ptr<!cc.struct<{[[TUP]]}>>
// CHECK: call @__nvqpp__mlirgen__function_prepQubit2{{.*}}(%[[VAL_10]], %[[VAL_6]]) : (!cc.struct<{[[TUP]]}>, !quake.ref) -> ()
// CHECK: %[[VAL_8:.*]] = cc.alloca !cc.struct<{[[TUP]]}{{.*}}>
// CHECK: %[[VAL_9:.*]] = cc.load %[[VAL_3]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: cc.store %[[VAL_9]], %[[VAL_8]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: %[[VAL_10:.*]] = cc.load %[[VAL_8]] : !cc.ptr<!cc.struct<{[[TUP]]}{{.*}}>>
// CHECK: call @__nvqpp__mlirgen__function_prepQubit2{{.*}}(%[[VAL_10]], %[[VAL_6]]) : (!cc.struct<{[[TUP]]}{{.*}}>, !quake.ref) -> ()
// CHECK: return
// CHECK: }

0 comments on commit 5b030df

Please sign in to comment.