Skip to content

Commit

Permalink
[GPU][DT] Add support for materializing tensor.empty and linalg.fill ops
Browse files Browse the repository at this point in the history
The revisions moves the materialization patterns of tensor.empty and
linalg.fill to "populateShapeLikeMaterializeEncodingPatterns" set;
updates the comments. This set of patterns lower the ops with encodings
to the same op with materialized types.

It adds the tile swizzle shape inference to the tensor.empty pattern and
moves the utility to the "Utility methods" section without changes.

Signed-off-by: hanhanW <[email protected]>
  • Loading branch information
hanhanW committed Sep 20, 2024
1 parent 337d49c commit 709b981
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -466,9 +466,9 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);
populateShapeLikeMaterializeEncodingPatterns(materializeEncodingPattern,
target, typeConverter,
materializeEncodingValueFn);

if (failed(applyPartialConversion(funcOp, target,
std::move(materializeEncodingPattern)))) {
Expand Down
7 changes: 4 additions & 3 deletions compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);

/// Pouplates the set of patterns that lowers IREE dialect (e.g., Flow, Hal,
/// etc) ops with encoding types to pack/unpack ops.
void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
/// Pouplates the set of patterns that lowers shape-like operations (e.g., Flow
/// ops, Hal ops, tensor.empty, linalg.fill, etc) with encoding types to the
/// same op with materialized shapes.
void populateShapeLikeMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ void GPUMaterializeDeviceEncodingPass::runOnOperation() {
MaterializeEncodingValueFn materializeEncodingValueFn =
[](RankedTensorType, OpBuilder,
Location) -> FailureOr<MaterializeEncodingValueInfo> { return {}; };
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
populateShapeLikeMaterializeEncodingPatterns(
patterns, target, typeConverter, materializeEncodingValueFn);

patterns.insert<GPUSetEncodingOpLoweringConversion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@
// 1. MFMA_F32_16x16x4_F32
//-----------------------------------------------------------------------------

#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<255x513xf32>,
user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
round_dims_to = array<i64: 16, 16, 16>>
#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>
]>
func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
%cst = arith.constant 0.0 : f32
%1 = tensor.empty() : tensor<255x513xf32, #encoding>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<255x513xf32, #encoding>) -> tensor<255x513xf32, #encoding>
flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [255, 513], strides = [1, 1] : tensor<255x513xf32, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
return
}
// CHECK-LABEL: func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x33x8x4x16x4xf32>
// CHECK: %{{.+}} = linalg.fill ins({{.+}}) outs(%[[EMPTY]]

// -----

#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<255x513xf32>,
user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
round_dims_to = array<i64: 16, 16, 16>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ struct MaterializeEncodingIntoNopPass final
MaterializeEncodingConversionTarget target(*context);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
populateIREEMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);
populateShapeLikeMaterializeEncodingPatterns(materializeEncodingPattern,
target, typeConverter,
materializeEncodingValueFn);

if (failed(applyPartialConversion(operation, target,
std::move(materializeEncodingPattern)))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,33 @@ namespace mlir::iree_compiler {
// Utility methods
//===---------------------------------------------------------------------===//

// Utility to apply a tile-swizzling to a packed shape.
static SmallVector<OpFoldResult>
getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
MaterializeEncodingInfo encodingInfo) {
if (packedShape.empty() || !encodingInfo.swizzle) {
return SmallVector<OpFoldResult>(packedShape);
}

int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
for (auto i : encodingInfo.swizzle->permutation) {
perm.push_back(i + srcRank);
}

SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
SmallVector<int64_t> expandedTileShape =
getExpandedTileShape(encodingInfo.swizzle->expandShape);
MLIRContext *ctx = packedShape[0].getContext();
Builder b(ctx);
for (int64_t d : expandedTileShape) {
newShape.push_back(b.getIndexAttr(d));
}
applyPermutationToVector(newShape, perm);

return newShape;
}

static Operation *dropEncodingAndCloneOp(OpBuilder &builder, Operation *op,
ValueRange convertedInputOperands,
ValueRange convertedOutputOperands) {
Expand Down Expand Up @@ -368,6 +395,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, tensor::EmptyOp emptyOp,
SmallVector<OpFoldResult> newShape = tensor::PackOp::getResultShape(
rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo->innerDimsPos,
encodingInfo->outerDimsPerm);
newShape = getSwizzledShape(newShape, *encodingInfo);
Operation *newEmptyOp = rewriter.create<tensor::EmptyOp>(
loc, newShape, emptyType.getElementType());
return newEmptyOp;
Expand Down Expand Up @@ -507,33 +535,6 @@ lowerOpWithEncoding(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
.Default([](Operation *op) { return failure(); });
}

// Utility to apply a tile-swizzling to a packed shape.
static SmallVector<OpFoldResult>
getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
MaterializeEncodingInfo encodingInfo) {
if (packedShape.empty() || !encodingInfo.swizzle) {
return SmallVector<OpFoldResult>(packedShape);
}

int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
for (auto i : encodingInfo.swizzle->permutation) {
perm.push_back(i + srcRank);
}

SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
SmallVector<int64_t> expandedTileShape =
getExpandedTileShape(encodingInfo.swizzle->expandShape);
MLIRContext *ctx = packedShape[0].getContext();
Builder b(ctx);
for (int64_t d : expandedTileShape) {
newShape.push_back(b.getIndexAttr(d));
}
applyPermutationToVector(newShape, perm);

return newShape;
}

/// For `dispatchTensorType` that bind a `RankedTensorType` with encoding,
/// returns the materialized shape of the `dispatchTensorType`. The
/// dynamic dimensions of the `dispatchTensorType` are provided in
Expand Down Expand Up @@ -818,6 +819,11 @@ struct UnsetEncodingOpToUnPackOpConversion
};

/// Generic pattern to convert operation that is in Destination Passing Style.
/// TODO(hanchung): Implement a different pattern for non-elementwise
/// operations. Because they should implement their own patterns based on
/// backends. The elementwise operations are just like shape-like op in
/// data-tiling concept. They still have the same computation but with different
/// shapes.
template <typename OpTy>
struct MaterializeDPSOperation : public OpMaterializeEncodingPattern<OpTy> {
using OpMaterializeEncodingPattern<OpTy>::OpMaterializeEncodingPattern;
Expand Down Expand Up @@ -914,16 +920,14 @@ void populateMaterializeEncodingIntoPackUnPackPatterns(
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
MLIRContext *context = patterns.getContext();
patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
MaterializeDPSOperation<linalg::GenericOp>,
MaterializeOperation<tensor::EmptyOp>,
patterns.insert<MaterializeDPSOperation<linalg::GenericOp>,
MaterializeContractionOp, SetEncodingOpToPackOpConversion,
UnsetEncodingOpToUnPackOpConversion>(
context, typeConverter, materializeEncodingValueFn);
memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
}

void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
void populateShapeLikeMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
Expand All @@ -949,7 +953,9 @@ void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
return resultType == typeConverter.convertType(resultType);
});

patterns.insert<MaterializeFlowDispatchTensorLoadOp,
patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
MaterializeOperation<tensor::EmptyOp>,
MaterializeFlowDispatchTensorLoadOp,
MaterializeFlowDispatchTensorStoreOp,
MaterializeInterfaceBindingEncoding>(
context, typeConverter, materializeEncodingValueFn);
Expand Down

0 comments on commit 709b981

Please sign in to comment.