Skip to content

Commit

Permalink
Fix PVC Example (codeplaysoftware#74)
Browse files Browse the repository at this point in the history
Fixes PVC Example which gives wrong results for batch size greater than 16

---------

Co-authored-by: Mehdi Goli <[email protected]>
Co-authored-by: aacostadiaz <[email protected]>
  • Loading branch information
3 people committed Aug 5, 2024
1 parent 1596bc7 commit 2690362
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions include/cutlass/gemm/kernel/intel_pvc_gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,11 @@ class GemmUniversal<
const int n_coord = (BlockIdxY() * num_sg + thread_idx / SubgroupSize) * get<1>(subgroup_shape);
const int l_coord = BlockIdxZ();
Tensor tAi = params.mainloop.gmem_tiled_copy_a.get_pvc_tensor(make_coord(m_coord, 0, l_coord),
Tensor tAi = params.mainloop.gmem_tiled_copy_a.get_pvc_tensor(make_coord(m_coord, 0, 0),
make_shape(_1{}, K, L),
make_stride(Int<FragsM * DpasM>{}, _1{}));
Tensor tBi = params.mainloop.gmem_tiled_copy_b.get_pvc_tensor(make_coord(0, n_coord, l_coord),
Tensor tBi = params.mainloop.gmem_tiled_copy_b.get_pvc_tensor(make_coord(0, n_coord, 0),
make_shape(K, Int<FragsN>{}, L),
make_stride(_1{}, Int<DpasN>{}));
Expand Down Expand Up @@ -260,7 +260,7 @@ class GemmUniversal<
);
auto gmem_tiled_copy_c = make_xe_2d_copy<XE_2D_U32x8x16x1x1_ST_N>(make_tensor(params.epilogue.ptr_D, make_shape(M, N, L), params.epilogue.dD));
Tensor tCi = gmem_tiled_copy_c.get_pvc_tensor(make_coord(m_coord, n_coord, l_coord),
Tensor tCi = gmem_tiled_copy_c.get_pvc_tensor(make_coord(m_coord, n_coord, 0),
make_shape(Int<FragsM>{}, Int<FragsN>{}, L),
make_stride(Int<DpasM>{}, Int<DpasN>{}));
Expand Down

0 comments on commit 2690362

Please sign in to comment.