Skip to content

Commit

Permalink
Remove all offsets from processOne(_simd)
Browse files Browse the repository at this point in the history
Remove:
- invokeProcessOneWithOrWithoutOffset
- exact_argument_type
- can_processOne_with_offset_invoke_test
- can_processOne_simd_with_offset
- can_processOne_scalar_with_offset
- can_processOne_with_offset

Modify MergedGraph apply_left, apply_right, processOne, and
processOne_simd to not require an offset argument anymore.

Modify all calls to processOne(_simd) to never pass an offset.

Modify TagMonitors processOne to generateTag without offset. The
published tag is now unconditional 0 all the time.

Signed-off-by: Matthias Kretz <[email protected]>
  • Loading branch information
mattkretz committed Sep 24, 2024
1 parent 7f7ad6f commit 7375c35
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ Commonly used for testing scenarios and signal termination where output is unnec
template<gr::meta::t_or_simd<T> V>
void processOne(V) noexcept {
if constexpr (stdx::is_simd_v<V>) {
count += V::size();
count += gr::Size_t(V::size());
} else {
count++;
}
Expand Down
8 changes: 4 additions & 4 deletions blocks/testing/include/gnuradio-4.0/testing/TagMonitors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
}
}

T processOne(std::size_t offset) noexcept
T processOne() noexcept
requires(UseProcessVariant == ProcessFunction::USE_PROCESS_ONE)
{
const auto [tagGenerated, tagRepeatStarted] = generateTag("processOne(...)", offset);
const auto [tagGenerated, tagRepeatStarted] = generateTag("processOne(...)");
_nSamplesProduced++;
if (!isInfinite() && _nSamplesProduced >= n_samples_max) {
this->requestStop();
Expand Down Expand Up @@ -192,7 +192,7 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
}

private:
[[nodiscard]] auto generateTag(std::string_view processFunctionName, std::size_t offset = 0) {
[[nodiscard]] auto generateTag(std::string_view processFunctionName) {
struct {
bool tagGenerated = false;
bool tagRepeatStarted = false;
Expand All @@ -203,7 +203,7 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
if (verbose_console) {
print_tag(_tags[_tagIndex], fmt::format("{}::{}\t publish tag at {:6}", this->name.value, processFunctionName, _nSamplesProduced));
}
out.publishTag(_tags[_tagIndex].map, static_cast<Tag::signed_index_type>(offset)); // indices > 0 write tags in the future ... handle with care
out.publishTag(_tags[_tagIndex].map, static_cast<Tag::signed_index_type>(0)); // indices > 0 write tags in the future ... handle with care
this->_outputTagsChanged = true;
_tagIndex++;
if (repeat_tags && _tagIndex == _tags.size()) {
Expand Down
4 changes: 2 additions & 2 deletions core/benchmarks/bm-nosonar_node_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,12 @@ void loop_over_processOne(auto& node) {
bm::test::n_samples_consumed = 0LU;
#if DISABLE_SIMD
for (std::size_t i = 0; i < N_SAMPLES; i++) {
node.processOne(i);
node.processOne();
}
#else
constexpr int N = 32;
for (std::size_t i = 0; i < N_SAMPLES / N; i++) {
node.template processOne_simd(i, std::integral_constant<std::size_t, N>{});
node.template processOne_simd(std::integral_constant<std::size_t, N>{});
}
#endif
expect(eq(bm::test::n_samples_produced, N_SAMPLES)) << "produced too many/few samples";
Expand Down
35 changes: 13 additions & 22 deletions core/include/gnuradio-4.0/Block.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,6 @@ constexpr auto simdize_tuple_load_and_apply(auto width, const std::tuple<Ts...>&
return [&]<std::size_t... Is>(std::index_sequence<Is...>) { return fun(std::tuple_element_t<Is, Tup>(std::ranges::data(std::get<Is>(rngs)) + offset, f)...); }(std::make_index_sequence<sizeof...(Ts)>());
}

template<typename T, typename... Us>
auto invokeProcessOneWithOrWithoutOffset(T& node, std::size_t offset, const Us&... inputs) {
if constexpr (traits::block::can_processOne_with_offset<T>) {
return node.processOne(offset, inputs...);
} else {
return node.processOne(inputs...);
}
}

template<std::size_t Index, PortType portType, typename Self>
[[nodiscard]] constexpr auto& inputPort(Self* self) noexcept {
using TRequestedPortType = typename traits::block::ports_data<Self>::template for_type<portType>::input_ports::template at<Index>;
Expand Down Expand Up @@ -756,30 +747,30 @@ class Block : public lifecycle::StateMachine<Derived>, public std::tuple<Argumen
}

template<typename... Ts>
constexpr auto invoke_processOne(std::size_t offset, Ts&&... inputs) {
constexpr auto invoke_processOne(Ts&&... inputs) {
if constexpr (traits::block::stream_output_ports<Derived>::size == 0) {
invokeProcessOneWithOrWithoutOffset(self(), offset, std::forward<Ts>(inputs)...);
self().processOne(std::forward<Ts>(inputs)...);
return std::tuple{};
} else if constexpr (traits::block::stream_output_ports<Derived>::size == 1) {
return std::tuple{invokeProcessOneWithOrWithoutOffset(self(), offset, std::forward<Ts>(inputs)...)};
return std::tuple{self().processOne(std::forward<Ts>(inputs)...)};
} else {
return invokeProcessOneWithOrWithoutOffset(self(), offset, std::forward<Ts>(inputs)...);
return self().processOne(std::forward<Ts>(inputs)...);
}
}

template<typename... Ts>
constexpr auto invoke_processOne_simd(std::size_t offset, auto width, Ts&&... input_simds) {
constexpr auto invoke_processOne_simd(auto width, Ts&&... input_simds) {
if constexpr (sizeof...(Ts) == 0) {
if constexpr (traits::block::stream_output_ports<Derived>::size == 0) {
self().processOne_simd(offset, width);
self().processOne_simd(width);
return std::tuple{};
} else if constexpr (traits::block::stream_output_ports<Derived>::size == 1) {
return std::tuple{self().processOne_simd(offset, width)};
return std::tuple{self().processOne_simd(width)};
} else {
return self().processOne_simd(offset, width);
return self().processOne_simd(width);
}
} else {
return invoke_processOne(offset, std::forward<Ts>(input_simds)...);
return invoke_processOne(std::forward<Ts>(input_simds)...);
}
}

Expand Down Expand Up @@ -1342,12 +1333,12 @@ class Block : public lifecycle::StateMachine<Derived>, public std::tuple<Argumen
work::Status invokeProcessOneSimd(auto& inputSpans, auto& outputSpans, auto width, std::size_t nSamplesToProcess) {
std::size_t i = 0;
for (; i + width <= nSamplesToProcess; i += width) {
const auto& results = simdize_tuple_load_and_apply(width, inputSpans, i, [&](const auto&... input_simds) { return invoke_processOne_simd(i, width, input_simds...); });
const auto& results = simdize_tuple_load_and_apply(width, inputSpans, i, [&](const auto&... input_simds) { return invoke_processOne_simd(width, input_simds...); });
meta::tuple_for_each([i](auto& output_range, const auto& result) { result.copy_to(output_range.data() + i, stdx::element_aligned); }, outputSpans, results);
}
simd_epilogue(width, [&](auto w) {
if (i + w <= nSamplesToProcess) {
const auto results = simdize_tuple_load_and_apply(w, inputSpans, i, [&](auto&&... input_simds) { return invoke_processOne_simd(i, w, input_simds...); });
const auto results = simdize_tuple_load_and_apply(w, inputSpans, i, [&](auto&&... input_simds) { return invoke_processOne_simd(w, input_simds...); });
meta::tuple_for_each([i](auto& output_range, auto& result) { result.copy_to(output_range.data() + i, stdx::element_aligned); }, outputSpans, results);
i += w;
}
Expand All @@ -1357,7 +1348,7 @@ class Block : public lifecycle::StateMachine<Derived>, public std::tuple<Argumen

work::Status invokeProcessOnePure(auto& inputSpans, auto& outputSpans, std::size_t nSamplesToProcess) {
for (std::size_t i = 0; i < nSamplesToProcess; ++i) {
auto results = std::apply([this, i](auto&... inputs) { return this->invoke_processOne(i, inputs[i]...); }, inputSpans);
auto results = std::apply([this, i](auto&... inputs) { return this->invoke_processOne(inputs[i]...); }, inputSpans);
meta::tuple_for_each([i]<typename R>(auto& output_range, R&& result) { output_range[i] = std::forward<R>(result); }, outputSpans, results);
}
return work::Status::OK;
Expand All @@ -1374,7 +1365,7 @@ class Block : public lifecycle::StateMachine<Derived>, public std::tuple<Argumen

std::size_t nOutSamplesBeforeRequestedStop = 0;
for (std::size_t i = 0; i < nSamplesToProcess; ++i) {
auto results = std::apply([this, i](auto&... inputs) { return this->invoke_processOne(i, inputs[i]...); }, inputSpans);
auto results = std::apply([this, i](auto&... inputs) { return this->invoke_processOne(inputs[i]...); }, inputSpans);
meta::tuple_for_each(
[i]<typename R>(auto& output_range, R&& result) {
if constexpr (meta::array_or_vector_type<std::remove_cvref<decltype(result)>>) {
Expand Down
39 changes: 9 additions & 30 deletions core/include/gnuradio-4.0/BlockTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,19 +206,17 @@ template<typename TBlock, typename TPortType>
using get_port_member_descriptor = typename meta::to_typelist<refl::descriptor::member_list<TBlock>>::template filter<detail::is_port_or_collection_descriptor>::template filter<detail::member_descriptor_has_type<TPortType>::template matches>::template at<0>;

// TODO: Why is this not done with requires?
// mkretz: I don't understand the question. "this" in the question is unclear.
/* Helper to determine the return type of `block.processOne` for the given inputs.
*
* This helper is necessary because we need a pack of indices to expand the input tuples. In princple we should be able
* to use std::apply to the same effect. Except that `block` would need to be the first element of the tuple. This here
* is simpler and cheaper.
*/
namespace detail {
template<std::size_t... Is>
auto can_processOne_invoke_test(auto& block, const auto& input, std::index_sequence<Is...>) -> decltype(block.processOne(std::get<Is>(input)...));

template<typename T>
struct exact_argument_type {
template<std::same_as<T> U>
constexpr operator U() const noexcept;
};

template<std::size_t... Is>
auto can_processOne_with_offset_invoke_test(auto& block, const auto& input, std::index_sequence<Is...>) -> decltype(block.processOne(exact_argument_type<std::size_t>(), std::get<Is>(input)...));

template<typename TBlock>
using simd_return_type_of_can_processOne = vir::simdize<stream_return_type<TBlock>, vir::simdize<stream_input_port_types_tuple<TBlock>>::size()>;
} // namespace detail
Expand All @@ -230,7 +228,7 @@ using simd_return_type_of_can_processOne = vir::simdize<stream_return_type<TBloc
* The requirement of at least one function argument disallows sources.
*
* There is another (unnamed) concept for source nodes: Source nodes can implement
* `processOne_simd(integral_constant)`, which returns SIMD object(s) of width N.
* `processOne_simd(integral_constant N)`, which returns SIMD object(s) of width N.
*/
template<typename TBlock>
concept can_processOne_simd = //
Expand All @@ -246,17 +244,6 @@ concept can_processOne_simd_const =
{ detail::can_processOne_invoke_test(block, input_simds, std::make_index_sequence<traits::block::stream_input_ports<TBlock>::size()>()) } -> std::same_as<detail::simd_return_type_of_can_processOne<TBlock>>;
};

template<typename TBlock>
concept can_processOne_simd_with_offset =
#if DISABLE_SIMD
false;
#else
traits::block::stream_input_ports<TBlock>::template all_of<port::is_port> and // checks we don't have port collections inside
traits::block::stream_input_port_types<TBlock>::size() > 0 && requires(TBlock& block, const vir::simdize<stream_input_port_types_tuple<TBlock>>& input_simds) {
{ detail::can_processOne_with_offset_invoke_test(block, input_simds, std::make_index_sequence<traits::block::stream_input_ports<TBlock>::size()>()) } -> std::same_as<detail::simd_return_type_of_can_processOne_t<TBlock>>;
};
#endif

template<typename TBlock>
concept can_processOne_scalar = requires(TBlock& block, const stream_input_port_types_tuple<TBlock>& inputs) {
{ detail::can_processOne_invoke_test(block, inputs, std::make_index_sequence<traits::block::stream_input_ports<TBlock>::size()>()) } -> std::same_as<stream_return_type<TBlock>>;
Expand All @@ -268,19 +255,11 @@ concept can_processOne_scalar_const = requires(const TBlock& block, const stream
};

template<typename TBlock>
concept can_processOne_scalar_with_offset = requires(TBlock& block, const stream_input_port_types_tuple<TBlock>& inputs) {
{ detail::can_processOne_with_offset_invoke_test(block, inputs, std::make_index_sequence<traits::block::stream_input_ports<TBlock>::size()>()) } -> std::same_as<stream_return_type<TBlock>>;
};

template<typename TBlock>
concept can_processOne = can_processOne_scalar<TBlock> or can_processOne_simd<TBlock> or can_processOne_scalar_with_offset<TBlock> or can_processOne_simd_with_offset<TBlock>;
concept can_processOne = can_processOne_scalar<TBlock> or can_processOne_simd<TBlock>;

template<typename TBlock>
concept can_processOne_const = can_processOne_scalar_const<TBlock> or can_processOne_simd_const<TBlock>;

template<typename TBlock>
concept can_processOne_with_offset = can_processOne_scalar_with_offset<TBlock> or can_processOne_simd_with_offset<TBlock>;

template<typename TBlock, typename TPort>
concept can_processMessagesForPortConsumableSpan = requires(TBlock& block, TPort& inPort) { block.processMessages(inPort, inPort.streamReader().get(1UZ)); };

Expand Down
34 changes: 15 additions & 19 deletions core/include/gnuradio-4.0/Graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,17 +722,17 @@ class MergedGraph : public Block<MergedGraph<Left, Right, OutId, InId>, meta::co
}

template<std::size_t I>
constexpr auto apply_left(std::size_t offset, auto&& input_tuple) noexcept {
return [&]<std::size_t... Is>(std::index_sequence<Is...>) { return invokeProcessOneWithOrWithoutOffset(left, offset, std::get<Is>(std::forward<decltype(input_tuple)>(input_tuple))...); }(std::make_index_sequence<I>());
constexpr auto apply_left(auto&& input_tuple) noexcept {
return [&]<std::size_t... Is>(std::index_sequence<Is...>) { return left.processOne(std::get<Is>(std::forward<decltype(input_tuple)>(input_tuple))...); }(std::make_index_sequence<I>());
}

template<std::size_t I, std::size_t J>
constexpr auto apply_right(std::size_t offset, auto&& input_tuple, auto&& tmp) noexcept {
constexpr auto apply_right(auto&& input_tuple, auto&& tmp) noexcept {
return [&]<std::size_t... Is, std::size_t... Js>(std::index_sequence<Is...>, std::index_sequence<Js...>) {
constexpr std::size_t first_offset = traits::block::stream_input_port_types<Left>::size;
constexpr std::size_t second_offset = traits::block::stream_input_port_types<Left>::size + sizeof...(Is);
static_assert(second_offset + sizeof...(Js) == std::tuple_size_v<std::remove_cvref_t<decltype(input_tuple)>>);
return invokeProcessOneWithOrWithoutOffset(right, offset, std::get<first_offset + Is>(std::forward<decltype(input_tuple)>(input_tuple))..., std::forward<decltype(tmp)>(tmp), std::get<second_offset + Js>(input_tuple)...);
return right.processOne(std::get<first_offset + Is>(std::forward<decltype(input_tuple)>(input_tuple))..., std::forward<decltype(tmp)>(tmp), std::get<second_offset + Js>(input_tuple)...);
}(std::make_index_sequence<I>(), std::make_index_sequence<J>());
}

Expand All @@ -754,48 +754,44 @@ class MergedGraph : public Block<MergedGraph<Left, Right, OutId, InId>, meta::co

template<meta::any_simd... Ts>
requires traits::block::can_processOne_simd<Left> and traits::block::can_processOne_simd<Right>
constexpr vir::simdize<TReturnType, (Ts::size(), ...)> processOne(std::size_t offset, const Ts&... inputs) {
constexpr vir::simdize<TReturnType, (0, ..., Ts::size())> processOne(const Ts&... inputs) {
static_assert(traits::block::stream_output_port_types<Left>::size == 1, "TODO: SIMD for multiple output ports not implemented yet");
return apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(offset, std::tie(inputs...), apply_left<traits::block::stream_input_port_types<Left>::size()>(offset, std::tie(inputs...)));
return apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(std::tie(inputs...), apply_left<traits::block::stream_input_port_types<Left>::size()>(std::tie(inputs...)));
}

constexpr auto processOne_simd(std::size_t offset, auto N)
constexpr auto processOne_simd(auto N)
requires traits::block::can_processOne_simd<Right>
{
if constexpr (requires(Left& l) {
{ l.processOne_simd(offset, N) };
{ l.processOne_simd(N) };
}) {
return invokeProcessOneWithOrWithoutOffset(right, offset, left.processOne_simd(offset, N));
} else if constexpr (requires(Left& l) {
{ l.processOne_simd(N) };
}) {
return invokeProcessOneWithOrWithoutOffset(right, offset, left.processOne_simd(N));
return right.processOne(left.processOne_simd(N));
} else {
using LeftResult = typename traits::block::stream_return_type<Left>;
using V = vir::simdize<LeftResult, N>;
alignas(stdx::memory_alignment_v<V>) LeftResult tmp[V::size()];
for (std::size_t i = 0UZ; i < V::size(); ++i) {
tmp[i] = invokeProcessOneWithOrWithoutOffset(left, offset + i);
tmp[i] = left.processOne();
}
return invokeProcessOneWithOrWithoutOffset(right, offset, V(tmp, stdx::vector_aligned));
return right.processOne(V(tmp, stdx::vector_aligned));
}
}

template<typename... Ts>
// Nicer error messages for the following would be good, but not at the expense of breaking can_processOne_simd.
requires(TInputPortTypes::template are_equal<std::remove_cvref_t<Ts>...>)
constexpr TReturnType processOne(std::size_t offset, Ts&&... inputs) {
constexpr TReturnType processOne(Ts&&... inputs) {
// if (sizeof...(Ts) == 0) we could call `return processOne_simd(integral_constant<size_t, width>)`. But if
// the caller expects to process *one* sample (no inputs for the caller to explicitly
// request simd), and we process more, we risk inconsistencies.
if constexpr (traits::block::stream_output_port_types<Left>::size == 1) {
// only the result from the right block needs to be returned
return apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(offset, std::forward_as_tuple(std::forward<Ts>(inputs)...), apply_left<traits::block::stream_input_port_types<Left>::size()>(offset, std::forward_as_tuple(std::forward<Ts>(inputs)...)));
return apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(std::forward_as_tuple(std::forward<Ts>(inputs)...), apply_left<traits::block::stream_input_port_types<Left>::size()>(std::forward_as_tuple(std::forward<Ts>(inputs)...)));

} else {
// left produces a tuple
auto left_out = apply_left<traits::block::stream_input_port_types<Left>::size()>(offset, std::forward_as_tuple(std::forward<Ts>(inputs)...));
auto right_out = apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(offset, std::forward_as_tuple(std::forward<Ts>(inputs)...), std::move(std::get<OutId>(left_out)));
auto left_out = apply_left<traits::block::stream_input_port_types<Left>::size()>(std::forward_as_tuple(std::forward<Ts>(inputs)...));
auto right_out = apply_right<InId, traits::block::stream_input_port_types<Right>::size() - InId - 1>(std::forward_as_tuple(std::forward<Ts>(inputs)...), std::move(std::get<OutId>(left_out)));

if constexpr (traits::block::stream_output_port_types<Left>::size == 2 && traits::block::stream_output_port_types<Right>::size == 1) {
return std::make_tuple(std::move(std::get<OutId ^ 1>(left_out)), std::move(right_out));
Expand Down
Loading

0 comments on commit 7375c35

Please sign in to comment.