Skip to content

Commit

Permalink
Fix range of immediate argument in vst2q_lane_f64 (#343)
Browse files Browse the repository at this point in the history
* Rectify range of immediate argument in vst2q_lane_f64

The range of 'lane' vstq_lane_f64 is documented as 0 <= lane <= 2. However, the
64-bit variant of ST2 specifies that this immediate has a range of 0 <= lane <= 1.

- ST2: https://developer.arm.com/documentation/dui0801/l/A64-SIMD-Vector-Instructions/ST2--vector--single-structure---A64-
  • Loading branch information
SpencerAbson committed Sep 12, 2024
1 parent fb3e19d commit 264f4cd
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 2 deletions.
3 changes: 2 additions & 1 deletion neon_intrinsics/advsimd.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ for more information about Arm’s trademarks.
### Changes for next release

* Textual improvements (non-functional changes).
* Fixed the range of the ``lane`` immediate argument for ``vst2q_lane_f64``.

<!---
**** Do not remove! ****
Expand Down Expand Up @@ -4279,7 +4280,7 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``.
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64" target="_blank">vst2_lane_p64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly64_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; poly64x1x2_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[1] -> Vt2.1D`<br>`val.val[0] -> Vt.1D`<br>`ptr -> Xn`<br>`0 <= lane <= 0` | `ST2 {Vt.d - Vt2.d}[lane],[Xn]` | | `A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64" target="_blank">vst2q_lane_p64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly64_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; poly64x2x2_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[1] -> Vt2.2D`<br>`val.val[0] -> Vt.2D`<br>`ptr -> Xn`<br>`0 <= lane <= 1` | `ST2 {Vt.d - Vt2.d}[lane],[Xn]` | | `A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64" target="_blank">vst2_lane_f64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float64_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; float64x1x2_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[1] -> Vt2.1D`<br>`val.val[0] -> Vt.1D`<br>`ptr -> Xn`<br>`0 <= lane <= 0` | `ST2 {Vt.d - Vt2.d}[lane],[Xn]` | | `A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64" target="_blank">vst2q_lane_f64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float64_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; float64x2x2_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[1] -> Vt2.2D`<br>`val.val[0] -> Vt.2D`<br>`ptr -> Xn`<br>`0 <= lane <= 2` | `ST2 {Vt.d - Vt2.d}[lane],[Xn]` | | `A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64" target="_blank">vst2q_lane_f64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float64_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; float64x2x2_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[1] -> Vt2.2D`<br>`val.val[0] -> Vt.2D`<br>`ptr -> Xn`<br>`0 <= lane <= 1` | `ST2 {Vt.d - Vt2.d}[lane],[Xn]` | | `A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16" target="_blank">vst3_lane_s16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; int16_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; int16x4x3_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[2] -> Vt3.4H`<br>`val.val[1] -> Vt2.4H`<br>`val.val[0] -> Vt.4H`<br>`ptr -> Xn`<br>`0 <= lane <= 3` | `ST3 {Vt.h - Vt3.h}[lane],[Xn]` | | `v7/A32/A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16" target="_blank">vst3q_lane_s16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; int16_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; int16x8x3_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[2] -> Vt3.8H`<br>`val.val[1] -> Vt2.8H`<br>`val.val[0] -> Vt.8H`<br>`ptr -> Xn`<br>`0 <= lane <= 7` | `ST3 {Vt.h - Vt3.h}[lane],[Xn]` | | `v7/A32/A64` |
| <code>void <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32" target="_blank">vst3_lane_s32</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; int32_t *ptr,<br>&nbsp;&nbsp;&nbsp;&nbsp; int32x2x3_t val,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `val.val[2] -> Vt3.2S`<br>`val.val[1] -> Vt2.2S`<br>`val.val[0] -> Vt.2S`<br>`ptr -> Xn`<br>`0 <= lane <= 1` | `ST3 {Vt.s - Vt3.s}[lane],[Xn]` | | `v7/A32/A64` |
Expand Down
1 change: 1 addition & 0 deletions neon_intrinsics/advsimd.template.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ for more information about Arm’s trademarks.
### Changes for next release

* Textual improvements (non-functional changes).
* Fixed the range of the ``lane`` immediate argument for ``vst2q_lane_f64``.

<!---
**** Do not remove! ****
Expand Down
2 changes: 1 addition & 1 deletion tools/intrinsic_db/advsimd.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2583,7 +2583,7 @@ void vst2q_lane_u64(uint64_t *ptr, uint64x2x2_t val, __builtin_constant_p(lane))
void vst2_lane_p64(poly64_t *ptr, poly64x1x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.1D;val.val[0] -> Vt.1D;ptr -> Xn;0 <= lane <= 0 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64
void vst2q_lane_p64(poly64_t *ptr, poly64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 1 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64
void vst2_lane_f64(float64_t *ptr, float64x1x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.1D;val.val[0] -> Vt.1D;ptr -> Xn;0 <= lane <= 0 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64
void vst2q_lane_f64(float64_t *ptr, float64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 2 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64
void vst2q_lane_f64(float64_t *ptr, float64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 1 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64
void vst3_lane_s16(int16_t *ptr, int16x4x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.4H;val.val[1] -> Vt2.4H;val.val[0] -> Vt.4H;ptr -> Xn;0 <= lane <= 3 ST3 {Vt.h - Vt3.h}[lane],[Xn] v7/A32/A64
void vst3q_lane_s16(int16_t *ptr, int16x8x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.8H;val.val[1] -> Vt2.8H;val.val[0] -> Vt.8H;ptr -> Xn;0 <= lane <= 7 ST3 {Vt.h - Vt3.h}[lane],[Xn] v7/A32/A64
void vst3_lane_s32(int32_t *ptr, int32x2x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.2S;val.val[1] -> Vt2.2S;val.val[0] -> Vt.2S;ptr -> Xn;0 <= lane <= 1 ST3 {Vt.s - Vt3.s}[lane],[Xn] v7/A32/A64
Expand Down

0 comments on commit 264f4cd

Please sign in to comment.