-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
863 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
tmp | ||
pdfs | ||
tex2pdf* | ||
tex2pdf* | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ toc: true | |
--- | ||
|
||
<!-- | ||
SPDX-FileCopyrightText: Copyright 2014-2023 Arm Limited and/or its affiliates <[email protected]> | ||
SPDX-FileCopyrightText: Copyright 2014-2024 Arm Limited and/or its affiliates <[email protected]> | ||
SPDX-FileCopyrightText: Copyright 2021 Matt P. Dziubinski <[email protected]> | ||
CC-BY-SA-4.0 AND Apache-Patent-License | ||
See LICENSE.md file for details | ||
|
@@ -107,7 +107,7 @@ for more information about Arm’s trademarks. | |
|
||
## Copyright | ||
|
||
* Copyright 2014-2023 Arm Limited and/or its affiliates <[email protected]> | ||
* Copyright 2014-2024 Arm Limited and/or its affiliates <[email protected]> | ||
* Copyright 2021 Matt P. Dziubinski <[email protected]> | ||
|
||
## Document history | ||
|
@@ -149,6 +149,7 @@ for more information about Arm’s trademarks. | |
### Changes for next release | ||
|
||
* Textual improvements (non-functional changes). | ||
* Fixed the range of the ``lane`` immediate argument for ``vst2q_lane_f64``. | ||
|
||
<!--- | ||
**** Do not remove! **** | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
<COMMENT> SPDX-FileCopyrightText: Copyright 2014-2023 Arm Limited <[email protected]> | ||
<COMMENT> SPDX-FileCopyrightText: Copyright 2014-2024 Arm Limited <[email protected]> | ||
<COMMENT> SPDX-FileCopyrightText: Copyright 2021 Matt P. Dziubinski <[email protected]> | ||
<COMMENT> SPDX-License-Identifier: Apache-2.0 | ||
<COMMENT> | ||
|
@@ -2583,7 +2583,7 @@ void vst2q_lane_u64(uint64_t *ptr, uint64x2x2_t val, __builtin_constant_p(lane)) | |
void vst2_lane_p64(poly64_t *ptr, poly64x1x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.1D;val.val[0] -> Vt.1D;ptr -> Xn;0 <= lane <= 0 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64 | ||
void vst2q_lane_p64(poly64_t *ptr, poly64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 1 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64 | ||
void vst2_lane_f64(float64_t *ptr, float64x1x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.1D;val.val[0] -> Vt.1D;ptr -> Xn;0 <= lane <= 0 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64 | ||
void vst2q_lane_f64(float64_t *ptr, float64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 2 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64 | ||
void vst2q_lane_f64(float64_t *ptr, float64x2x2_t val, __builtin_constant_p(lane)) val.val[1] -> Vt2.2D;val.val[0] -> Vt.2D;ptr -> Xn;0 <= lane <= 1 ST2 {Vt.d - Vt2.d}[lane],[Xn] A64 | ||
void vst3_lane_s16(int16_t *ptr, int16x4x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.4H;val.val[1] -> Vt2.4H;val.val[0] -> Vt.4H;ptr -> Xn;0 <= lane <= 3 ST3 {Vt.h - Vt3.h}[lane],[Xn] v7/A32/A64 | ||
void vst3q_lane_s16(int16_t *ptr, int16x8x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.8H;val.val[1] -> Vt2.8H;val.val[0] -> Vt.8H;ptr -> Xn;0 <= lane <= 7 ST3 {Vt.h - Vt3.h}[lane],[Xn] v7/A32/A64 | ||
void vst3_lane_s32(int32_t *ptr, int32x2x3_t val, __builtin_constant_p(lane)) val.val[2] -> Vt3.2S;val.val[1] -> Vt2.2S;val.val[0] -> Vt.2S;ptr -> Xn;0 <= lane <= 1 ST3 {Vt.s - Vt3.s}[lane],[Xn] v7/A32/A64 | ||
|
@@ -3730,6 +3730,83 @@ float64x2_t vreinterpretq_f64_p128(poly128_t a) a -> Vd.1Q NOP Vd.2D -> result A | |
float16x8_t vreinterpretq_f16_p128(poly128_t a) a -> Vd.1Q NOP Vd.8H -> result A32/A64 | ||
poly128_t vldrq_p128(poly128_t const *ptr) ptr -> Xn LDR Qd,[Xn] Qd -> result A32/A64 | ||
void vstrq_p128(poly128_t *ptr, poly128_t val) val -> Qt;ptr -> Xn STR Qt,[Xn] A32/A64 | ||
|
||
uint8x16_t vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
uint8x16_t vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
uint8x16_t vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
uint8x16_t vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
int8x16_t vluti2_lane_s8(int8x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
int8x16_t vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
int8x16_t vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
int8x16_t vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
poly8x16_t vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
poly8x16_t vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
poly8x16_t vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
poly8x16_t vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 3 LUTI2 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
uint16x8_t vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
uint16x8_t vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
uint16x8_t vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
uint16x8_t vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
int16x8_t vluti2_lane_s16(int16x4_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
int16x8_t vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
int16x8_t vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
int16x8_t vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
float16x8_t vluti2_lane_f16(float16x4_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
float16x8_t vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
float16x8_t vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
float16x8_t vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
bfloat16x8_t vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
bfloat16x8_t vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
bfloat16x8_t vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
bfloat16x8_t vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
poly16x8_t vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
poly16x8_t vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
poly16x8_t vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 3 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
poly16x8_t vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.8H;vm -> Vm;0 <= index <= 7 LUTI2 Vd.8H, {Vn.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
uint8x16_t vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 0 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
uint8x16_t vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
int8x16_t vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 0 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
int8x16_t vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
poly8x16_t vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 0 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
poly8x16_t vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn -> Vn.16B;vm -> Vm;0 <= index <= 1 LUTI4 Vd.16B, {Vn.16B}, Vm[index] Vd.16B -> result A64 | ||
|
||
uint16x8_t vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 1 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
uint16x8_t vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 3 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
int16x8_t vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 1 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
int16x8_t vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 3 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
float16x8_t vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 1 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
float16x8_t vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 3 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
bfloat16x8_t vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 1 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
bfloat16x8_t vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 3 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
poly16x8_t vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 1 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
poly16x8_t vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm, __builtin_constant_p(index)) vn.val[0] -> Vn1.8H;vn.val[1] -> Vn2.8H;vm -> Vm;0 <= index <= 3 LUTI4 Vd.8H, {Vn1.8H, Vn2.8H}, Vm[index] Vd.8H -> result A64 | ||
|
||
float16x4_t vamax_f16(float16x4_t vn, float16x4_t vm) vn -> Vn.4H;vm -> Vm.4H FAMAX Vd.4H, Vn.4H, Vm.4H Vd.4H -> result A64 | ||
float16x8_t vamaxq_f16(float16x8_t vn, float16x8_t vm) vn -> Vn.8H;vm -> Vm.8H FAMAX Vd.8H, Vn.8H, Vm.8H Vd.8H -> result A64 | ||
float32x2_t vamax_f32(float32x2_t vn, float32x2_t vm) vn -> Vn.2S;vm -> Vm.2S FAMAX Vd.2S, Vn.2S, Vm.2S Vd.2S -> result A64 | ||
float32x4_t vamaxq_f32(float32x4_t vn, float32x4_t vm) vn -> Vn.4S;vm -> Vm.4S FAMAX Vd.4S, Vn.4S, Vm.4S Vd.4S -> result A64 | ||
float64x2_t vamaxq_f64(float64x2_t vn, float64x2_t vm) vn -> Vn.2D;vm -> Vm.2D FAMAX Vd.2D, Vn.2D, Vm.2D Vd.2D -> result A64 | ||
|
||
float16x4_t vamin_f16(float16x4_t vn, float16x4_t vm) vn -> Vn.4H;vm -> Vm.4H FAMIN Vd.4H, Vn.4H, Vm.4H Vd.4H -> result A64 | ||
float16x8_t vaminq_f16(float16x8_t vn, float16x8_t vm) vn -> Vn.8H;vm -> Vm.8H FAMIN Vd.8H, Vn.8H, Vm.8H Vd.8H -> result A64 | ||
float32x2_t vamin_f32(float32x2_t vn, float32x2_t vm) vn -> Vn.2S;vm -> Vm.2S FAMIN Vd.2S, Vn.2S, Vm.2S Vd.2S -> result A64 | ||
float32x4_t vaminq_f32(float32x4_t vn, float32x4_t vm) vn -> Vn.4S;vm -> Vm.4S FAMIN Vd.4S, Vn.4S, Vm.4S Vd.4S -> result A64 | ||
float64x2_t vaminq_f64(float64x2_t vn, float64x2_t vm) vn -> Vn.2D;vm -> Vm.2D FAMIN Vd.2D, Vn.2D, Vm.2D Vd.2D -> result A64 | ||
|
||
<SECTION> Crypto | ||
uint8x16_t vaeseq_u8(uint8x16_t data, uint8x16_t key) data -> Vd.16B;key -> Vn.16B AESE Vd.16B,Vn.16B Vd.16B -> result A32/A64 | ||
uint8x16_t vaesdq_u8(uint8x16_t data, uint8x16_t key) data -> Vd.16B;key -> Vn.16B AESD Vd.16B,Vn.16B Vd.16B -> result A32/A64 | ||
|
@@ -4470,4 +4547,4 @@ float32x4_t vbfmlaltq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) r -> Vd | |
float32x4_t vbfmlalbq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b, __builtin_constant_p(lane)) r -> Vd.4S;a -> Vn.8H;b -> Vm.4H;0 <= lane <= 3 BFMLALB Vd.4S,Vn.8H,Vm.H[lane] Vd.4S -> result A32/A64 | ||
float32x4_t vbfmlalbq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b, __builtin_constant_p(lane)) r -> Vd.4S;a -> Vn.8H;b -> Vm.8H;0 <= lane <= 7 BFMLALB Vd.4S,Vn.8H,Vm.H[lane] Vd.4S -> result A32/A64 | ||
float32x4_t vbfmlaltq_lane_f32(float32x4_t r, bfloat16x8_t a, bfloat16x4_t b, __builtin_constant_p(lane)) r -> Vd.4S;a -> Vn.8H;b -> Vm.4H;0 <= lane <= 3 BFMLALT Vd.4S,Vn.8H,Vm.H[lane] Vd.4S -> result A32/A64 | ||
float32x4_t vbfmlaltq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b, __builtin_constant_p(lane)) r -> Vd.4S;a -> Vn.8H;b -> Vm.8H;0 <= lane <= 7 BFMLALT Vd.4S,Vn.8H,Vm.H[lane] Vd.4S -> result A32/A64 | ||
float32x4_t vbfmlaltq_laneq_f32(float32x4_t r, bfloat16x8_t a, bfloat16x8_t b, __builtin_constant_p(lane)) r -> Vd.4S;a -> Vn.8H;b -> Vm.8H;0 <= lane <= 7 BFMLALT Vd.4S,Vn.8H,Vm.H[lane] Vd.4S -> result A32/A64 |
Oops, something went wrong.