Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tests for matrix size be runtime dimension #15429

Draft
wants to merge 8 commits into
base: sycl
Choose a base branch
from
22 changes: 22 additions & 0 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//==--- joint_matrix_bf16_fill_k_cache_OOB.cpp - DPC++ joint_matrix--------==//
YixingZhang007 marked this conversation as resolved.
Show resolved Hide resolved
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: aspect-ext_intel_matrix

// https://jira.devtools.intel.com/browse/GSD-9716
YixingZhang007 marked this conversation as resolved.
Show resolved Hide resolved
// XFAIL: arch-intel_gpu_pvc

// RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM -DVNNI
YixingZhang007 marked this conversation as resolved.
Show resolved Hide resolved
// RUN: %{run} %t_arg_dim_vnni.out

// RUN: %{build} -o %t_arg_dim.out -ffp-model=precise -DARG_DIM
// RUN: %{run} %t_arg_dim.out

// -ffp-model=precise is added to not depend on compiler defaults.

#include "common.hpp"
#include "joint_matrix_bf16_fill_k_cache_impl.hpp"
19 changes: 19 additions & 0 deletions sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,23 @@ static constexpr void manually_unroll_loop(F &&f) {

template <size_t TM, size_t TN, size_t TK> class MatMul;

#ifdef ARG_DIM
template <size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
size_t MCache2, size_t NCache2, size_t KCache2>
#else // ARG_DIM
template <size_t rowsA, size_t colsA, size_t rowsB, size_t colsB,
size_t vnniFactor, typename TOperand, typename TResult, size_t TM,
size_t TN, size_t TK, size_t MCache1, size_t NCache1, size_t KCache1,
size_t MCache2, size_t NCache2, size_t KCache2>
YixingZhang007 marked this conversation as resolved.
Show resolved Hide resolved
#endif // ARG_DIM

#ifdef ARG_DIM
double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i, size_t rowsA, size_t colsA, size_t rowsB, size_t colsB) {
#else // ARG_DIM
double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i) {
#endif // ARG_DIM

size_t sgSize = get_sg_size<MatMul<TM, TN, TK>>(q);
range<2> global{rowsA / MCache1, (colsB / NCache1) * sgSize};
range<2> cachelocal{MCache2 / MCache1, NCache2 / NCache1 * sgSize};
Expand Down Expand Up @@ -381,10 +393,17 @@ void test() {
// run testIterations time, aggregate and calculate average run time
double totalDuration = 0;
for (unsigned int i = 0; i < testIterations; i++) {
#ifdef ARG_DIM
double duration =
joint_matmul<vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
KCache1, MCache2, NCache2, KCache2>(A, B, C, q, i,
MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE);
#else // ARG_DIM
double duration =
joint_matmul<MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE,
vnniFactor, T, TResult, TM, TN, TK, MCache1, NCache1,
KCache1, MCache2, NCache2, KCache2>(A, B, C, q, i);
#endif // ARG_DIM
if (i >= recordThresh) {
totalDuration += duration;
}
Expand Down
Loading