Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add interface for differentiating inputs and weights in CG & PCG #1493

Merged
merged 6 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/legion_dim.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define _FLEXFLOW_KERNELS_INCLUDE_KERNELS_LEGION_DIM_H

#include "kernels/legion_dim_t.dtg.h"
#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"

namespace FlexFlow {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "kernels/device.h"
#include "kernels/nccl.h"
#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/required.h"
#include "utils/strong_typedef.h"
#include "utils/type_traits.h"
Expand Down
7 changes: 5 additions & 2 deletions lib/local-execution/src/local_cost_estimator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
for (ParallelTensorShape const &input : inputs) {
TensorShape tensor_shape = get_piece_shape(input);
tensor_guid_t tensor_id =
cg_builder.create_tensor(tensor_shape, CreateGrad::YES);
cg_builder.create_input(tensor_shape, CreateGrad::YES);

Check warning on line 54 in lib/local-execution/src/local_cost_estimator.cc

View check run for this annotation

Codecov / codecov/patch

lib/local-execution/src/local_cost_estimator.cc#L54

Added line #L54 was not covered by tests
GenericTensorAccessorW tensor_backing =
allocator.allocate_tensor(tensor_shape);
tensor_backing_map.insert({tensor_id, tensor_backing});
Expand All @@ -69,7 +69,10 @@
std::vector<tensor_guid_t> output_tensor_ids =
cg_builder.add_layer(layer_attrs,
input_tensor_ids,
get_vector_piece_attrs(weights),
transform(get_vector_piece_attrs(weights),
[&](TensorAttrs const &a) {
return cg_builder.create_weight(a);

Check warning on line 74 in lib/local-execution/src/local_cost_estimator.cc

View check run for this annotation

Codecov / codecov/patch

lib/local-execution/src/local_cost_estimator.cc#L72-L74

Added lines #L72 - L74 were not covered by tests
}),
get_vector_piece_attrs(outputs));

LocalTrainingBacking local_backing(allocator,
Expand Down
6 changes: 3 additions & 3 deletions lib/local-execution/test/src/test_local_slots_backing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ TEST_SUITE(FF_TEST_SUITE) {
// build graph
ComputationGraphBuilder cg_builder;
tensor_guid_t query_guid =
cg_builder.create_tensor(query_shape, CreateGrad::YES);
cg_builder.create_input(query_shape, CreateGrad::YES);
tensor_guid_t key_guid =
cg_builder.create_tensor(key_shape, CreateGrad::YES);
cg_builder.create_input(key_shape, CreateGrad::YES);
tensor_guid_t value_guid =
cg_builder.create_tensor(value_shape, CreateGrad::YES);
cg_builder.create_input(value_shape, CreateGrad::YES);

std::string layer_name = "attn1";
tensor_guid_t output_guid =
Expand Down
11 changes: 7 additions & 4 deletions lib/models/src/models/transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ tensor_guid_t create_transformer_encoder_layer(ComputationGraphBuilder &cgb,
config.num_heads,
kdim,
vdim,
config.dropout);
config.dropout,
/*bias=*/false);
assert(are_tensor_guid_shapes_equivalent(
cgb.computation_graph, input, self_attention));

Expand Down Expand Up @@ -88,7 +89,8 @@ tensor_guid_t
config.num_heads,
kdim,
vdim,
config.dropout);
config.dropout,
/*bias=*/false);
assert(are_tensor_guid_shapes_equivalent(
cgb.computation_graph, input, self_attention));

Expand All @@ -107,7 +109,8 @@ tensor_guid_t
config.num_heads,
kdim,
vdim,
config.dropout);
config.dropout,
/*bias=*/false);
assert(are_tensor_guid_shapes_equivalent(cgb.computation_graph, input, mha));

tensor_guid_t mha_normalized =
Expand Down Expand Up @@ -149,7 +152,7 @@ ComputationGraph
config.batch_size, config.sequence_length, config.num_features}},
DataType::FLOAT,
};
tensor_guid_t input = cgb.create_tensor(input_shape, CreateGrad::YES);
tensor_guid_t input = cgb.create_input(input_shape, CreateGrad::YES);

tensor_guid_t encoder_output = create_transformer_encoder(cgb, config, input);
tensor_guid_t decoder_output =
Expand Down
3 changes: 3 additions & 0 deletions lib/op-attrs/include/op-attrs/computation_graph_op_attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_COMPUTATION_GRAPH_OP_ATTRS_H

#include "op-attrs/computation_graph_op_attrs.dtg.h"
#include "op-attrs/pcg_operator_attrs.dtg.h"
#include "utils/record_formatter.h"

namespace FlexFlow {

OperatorType get_op_type(ComputationGraphOpAttrs const &);
RecordFormatter as_dot(ComputationGraphOpAttrs const &);
ComputationGraphOpAttrs
compgraph_op_attrs_from_pcg_op_attrs(PCGOperatorAttrs const &);

} // namespace FlexFlow

Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/enumerate.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ENUMERATE_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ENUMERATE_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/bidict/bidict.h"
#include "utils/containers/count.h"

Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/ff_ordered_of.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_FF_ORDERED_OF_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_FF_ORDERED_OF_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"

namespace FlexFlow {

Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/get_idxs.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_GET_IDXS_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_GET_IDXS_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/containers/count.h"
#include "utils/containers/transform.h"

Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/slice.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_SLICE_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_SLICE_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/containers/as_vector.h"
#include "utils/containers/subvec.h"
#include "utils/containers/transform.h"
Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/transform.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_TRANSFORM_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_TRANSFORM_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/containers/as_vector.h"
#include "utils/containers/vector_transform.h"

Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/dim_ordered/zip.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ZIP_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ZIP_H

#include "op-attrs/dim_ordered.h"
#include "op-attrs/dim_ordered/dim_ordered.h"
#include "utils/containers/as_vector.h"
#include "utils/containers/zip.h"

Expand Down
17 changes: 17 additions & 0 deletions lib/op-attrs/include/op-attrs/get_incoming_tensor_roles.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_INCOMING_TENSOR_ROLES_H
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_INCOMING_TENSOR_ROLES_H

#include "op-attrs/computation_graph_op_attrs.dtg.h"
#include "op-attrs/incoming_tensor_role.dtg.h"
#include "op-attrs/pcg_operator_attrs.dtg.h"

namespace FlexFlow {

std::vector<IncomingTensorRole>
get_incoming_tensor_roles(ComputationGraphOpAttrs const &, int num_inputs);
std::vector<IncomingTensorRole>
get_incoming_tensor_roles(PCGOperatorAttrs const &, int num_inputs);

} // namespace FlexFlow

#endif
14 changes: 14 additions & 0 deletions lib/op-attrs/include/op-attrs/incoming_tensor_role.enum.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace = "FlexFlow"
name = "IncomingTensorRole"
features = [
"hash",
"fmt",
"rapidcheck",
"json",
]

[[values]]
name = "INPUT"

[[values]]
name = "WEIGHT"
20 changes: 20 additions & 0 deletions lib/op-attrs/include/op-attrs/ops/attention.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef _FLEXFLOW_ATTENTION_ATTRS_H
#define _FLEXFLOW_ATTENTION_ATTRS_H

#include "op-attrs/incoming_tensor_role.dtg.h"
#include "op-attrs/ops/attention/multihead_attention_inputs.dtg.h"
#include "op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h"
#include "op-attrs/ops/attention_attrs.dtg.h"
Expand Down Expand Up @@ -37,6 +38,9 @@ int get_kvSeqLength(MultiHeadAttentionInputs const &);
int get_num_samples(MultiHeadAttentionParallelInputs const &);
int get_num_samples(MultiHeadAttentionInputs const &);

std::vector<IncomingTensorRole>
get_attention_incoming_tensor_roles(MultiHeadAttentionAttrs const &);

tl::expected<TensorShape, std::string>
get_weights_shape(MultiHeadAttentionAttrs const &,
TensorShape const &input_q,
Expand All @@ -58,6 +62,22 @@ tl::expected<TensorShape, std::string>
TensorShape const &input_k,
TensorShape const &input_v);

tl::expected<ParallelTensorDims, std::string>
get_weights_parallel_dims(MultiHeadAttentionAttrs const &,
ParallelTensorShape const &input_q,
ParallelTensorShape const &input_k,
ParallelTensorShape const &input_v);
tl::expected<ParallelTensorDims, std::string>
get_input_bias_parallel_dims(MultiHeadAttentionAttrs const &,
ParallelTensorShape const &input_q,
ParallelTensorShape const &input_k,
ParallelTensorShape const &input_v);
tl::expected<ParallelTensorDims, std::string>
get_output_bias_parallel_dims(MultiHeadAttentionAttrs const &,
ParallelTensorShape const &input_q,
ParallelTensorShape const &input_k,
ParallelTensorShape const &input_v);

tl::expected<ParallelTensorShape, std::string>
get_weights_shape(MultiHeadAttentionAttrs const &,
ParallelTensorShape const &input_q,
Expand Down
3 changes: 3 additions & 0 deletions lib/op-attrs/include/op-attrs/ops/batch_matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_BATCH_MATMUL_H

#include "op-attrs/ops/batch_matmul.dtg.h"
#include "op-attrs/ops/core.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "op-attrs/tensor_shape.dtg.h"
#include <tl/expected.hpp>

namespace FlexFlow {

CHECK_VALID_OP_ATTR(BatchMatmulAttrs);

bool is_valid(BatchMatmulAttrs const &,
ParallelTensorShape const &,
ParallelTensorShape const &);
Expand Down
4 changes: 4 additions & 0 deletions lib/op-attrs/include/op-attrs/ops/conv_2d.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef _FLEXFLOW_CONV_2D_ATTRS_H
#define _FLEXFLOW_CONV_2D_ATTRS_H

#include "op-attrs/incoming_tensor_role.dtg.h"
#include "op-attrs/ops/conv_2d_attrs.dtg.h"
#include "op-attrs/ops/core.h"
#include "op-attrs/parallel_tensor_shape.h"
Expand All @@ -10,6 +11,9 @@ namespace FlexFlow {

CHECK_VALID_OP_ATTR(Conv2DAttrs);

std::vector<IncomingTensorRole>
get_conv2d_incoming_tensor_roles(Conv2DAttrs const &);

TensorShape get_kernel_shape(Conv2DAttrs const &attrs,
TensorShape const &input);
TensorShape get_bias_shape(Conv2DAttrs const &attrs, TensorShape const &input);
Expand Down
4 changes: 4 additions & 0 deletions lib/op-attrs/include/op-attrs/ops/layer_norm.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#ifndef _FLEXFLOW_OP_META_OPS_LAYER_NORM_ATTRS_H
#define _FLEXFLOW_OP_META_OPS_LAYER_NORM_ATTRS_H

#include "op-attrs/incoming_tensor_role.dtg.h"
#include "op-attrs/ops/core.h"
#include "op-attrs/ops/layer_norm_attrs.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "op-attrs/tensor_shape.dtg.h"

namespace FlexFlow {

std::vector<IncomingTensorRole>
get_layer_norm_incoming_tensor_roles(LayerNormAttrs const &);

tl::expected<TensorShape, std::string> get_output_shape(LayerNormAttrs const &,
TensorShape const &);
tl::expected<TensorShape, std::string>
Expand Down
10 changes: 7 additions & 3 deletions lib/op-attrs/include/op-attrs/ops/linear.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef _FLEXFLOW_LINEAR_ATTRS_H
#define _FLEXFLOW_LINEAR_ATTRS_H

#include "op-attrs/incoming_tensor_role.dtg.h"
#include "op-attrs/ops/core.h"
#include "op-attrs/ops/linear_attrs.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
Expand All @@ -10,20 +11,23 @@

namespace FlexFlow {

std::vector<IncomingTensorRole>
get_linear_incoming_tensor_roles(LinearAttrs const &);

CHECK_VALID_OP_ATTR(LinearAttrs);

RecordFormatter as_dot(LinearAttrs const &);

tl::expected<TensorShape, std::string>
get_kernel_shape(LinearAttrs const &attrs, TensorShape const &input);
get_projection_shape(LinearAttrs const &attrs, TensorShape const &input);
tl::expected<TensorShape, std::string> get_bias_shape(LinearAttrs const &attrs,
TensorShape const &input);
tl::expected<TensorShape, std::string>
get_output_shape(LinearAttrs const &attrs, TensorShape const &input);

tl::expected<ParallelTensorShape, std::string>
get_kernel_shape(LinearAttrs const &attrs,
ParallelTensorShape const &input);
get_projection_shape(LinearAttrs const &attrs,
ParallelTensorShape const &input);
tl::expected<ParallelTensorShape, std::string>
get_bias_shape(LinearAttrs const &attrs, ParallelTensorShape const &input);
tl::expected<ParallelTensorShape, std::string>
Expand Down
2 changes: 2 additions & 0 deletions lib/op-attrs/include/op-attrs/ops/topk.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#include "op-attrs/ops/core.h"
#include "op-attrs/ops/topk_attrs.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "op-attrs/tensor_shape.dtg.h"

namespace FlexFlow {

CHECK_VALID_OP_ATTR(TopKAttrs);

TensorShape get_output_shape(TopKAttrs const &, TensorShape const &);
ParallelTensorShape get_output_shape(TopKAttrs const &attrs,
ParallelTensorShape const &input_shape);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ features = [
includes = [
"op-attrs/ff_dim.h",
"op-attrs/ff_dim.dtg.h",
"op-attrs/dim_ordered.h",
"op-attrs/dim_ordered/dim_ordered.h",
]

[[fields]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ features = [
]

includes = [
"op-attrs/dim_ordered.h",
"op-attrs/dim_ordered/dim_ordered.h",
"op-attrs/shard_parallel_dim.dtg.h",
"op-attrs/replica_parallel_dim_set.dtg.h",
"<unordered_map>",
Expand Down
4 changes: 2 additions & 2 deletions lib/op-attrs/include/op-attrs/pcg_operator_attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ namespace FlexFlow {

bool is_parallel_op(PCGOperatorAttrs const &);
OperatorType get_op_type(PCGOperatorAttrs const &);
ComputationGraphOpAttrs
compgraph_op_attrs_from_pcg_op_attrs(PCGOperatorAttrs const &);
PCGOperatorAttrs
pcg_op_attrs_from_compgraph_op_attrs(ComputationGraphOpAttrs const &);
RecordFormatter as_dot(PCGOperatorAttrs const &);

} // namespace FlexFlow
Expand Down
5 changes: 5 additions & 0 deletions lib/op-attrs/include/op-attrs/pcg_operator_attrs.variant.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ includes = [
"op-attrs/ops/attention_attrs.dtg.h",
"op-attrs/ops/batch_matmul.dtg.h",
"op-attrs/ops/batch_norm_attrs.dtg.h",
"op-attrs/ops/broadcast_attrs.dtg.h",
"op-attrs/ops/cast_attrs.dtg.h",
"op-attrs/ops/combine_attrs.dtg.h",
"op-attrs/ops/concat_attrs.dtg.h",
Expand Down Expand Up @@ -49,6 +50,10 @@ key = "batch_matmul"
type = "::FlexFlow::BatchNormAttrs"
key = "batch_norm"

[[values]]
type = "::FlexFlow::BroadcastAttrs"
key = "broadcast"

[[values]]
type = "::FlexFlow::CastAttrs"
key = "cast"
Expand Down
2 changes: 1 addition & 1 deletion lib/op-attrs/include/op-attrs/tensor_dims.struct.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ features = [
"fmt",
]
includes = [
"op-attrs/dim_ordered.h",
"op-attrs/dim_ordered/dim_ordered.h",
]

[[fields]]
Expand Down
Loading
Loading