Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
-Fix LD and ST naming for the quadword
-Remove bf16 support for the QV instructions
  • Loading branch information
CarolineConcatto committed Jun 23, 2023
1 parent 530c532 commit 5508e93
Showing 1 changed file with 50 additions and 58 deletions.
108 changes: 50 additions & 58 deletions main/acle.md
Original file line number Diff line number Diff line change
Expand Up @@ -9544,11 +9544,17 @@ Extract vector segment from each pair of quadword segments.
Contiguous zero-extending load to quadword (single vector).

``` c
svuint32_t svld1uw_u128[_u32](svbool_t, const uint32_t *ptr);
svuint32_t svld1uw_u128[_u32](svbool_t, const uint32_t *ptr, int64_t vnum);
// Variants are also available for:
// _s8, _u16, _f16, _u32, _s32
// _bf16, _f16, _f32
svuint8_t svld1quw[_u8](svbool_t, const uint8_t *ptr);
svuint8_t svld1quw_vnum[_u8](svbool_t, const uint8_t *ptr, int64_t vnum);

svuint64_t svld1ud_u128[_u64](svbool_t, const uint64_t *ptr);
svuint64_t svld1ud_u128[_u64](svbool_t, const uint64_t *ptr, int64_t vnum);
// Variants are also available for:
// _s8, _u16, _f16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svld1qud[_u8](svbool_t, const uint8_t *ptr);
svuint8_t svld1qud_vnum[_u8](svbool_t, const uint8_t *ptr, int64_t vnum);
```

#### LD1B, LD1D, LD1H, LD1W
Expand Down Expand Up @@ -9599,10 +9605,17 @@ Gather Load Quadword.

``` c
// Variants are also available for:
// _u64base_u8, _u64base_u16, _u64base_s16, _u64base_u32, _u64base_s32,
// _u64base _u64, _u64base_s64
// _u64base_bf16, _u64base_f16, _u64base_f32, _u64base_f64
svint8_t svld1q_gather[_u64base_s8](svbool_t pg, svint64_t zn, const void *rm);
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svint8_t svld1q_gather[_u64base]_s8(svbool_t pg, svuint64_t zn);
svint8_t svld1q_gather[_u64base]_offset_s8(svbool_t pg, svuint64_t zn, int64_t offset);
svint8_t svld1q_gather[_u64base]_index_s8(svbool_t pg, svuint64_t zn, int64_t index);
svint8_t svld1q_gather_[u64]offset[_s8](svbool_t pg, const int8_t *base, svuint64_t offset);

// Variants are also available for:
// _u16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svint16_t svld1q_gather_[u64]index[_s16](svbool_t pg, const int16_t *base, svuint64_t index);
```

#### LD2Q, LD3Q, LD4Q
Expand Down Expand Up @@ -9670,14 +9683,9 @@ Max/Min reduction of quadword vector segments.
``` c
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
uint8x16_t svmaxqv[_u8]_m(svbool_t pg, svuint8_t zn);
uint8x16_t svmaxqv[_u8]_z(svbool_t pg, svuint8_t zn);
uint8x16_t svmaxqv[_u8]_x(svbool_t pg, svuint8_t zn);

uint8x16_t svminqv[_u8]_m(svbool_t pg, svuint8_t zn);
uint8x16_t svminqv[_u8]_z(svbool_t pg, svuint8_t zn);
uint8x16_t svminqv[_u8]_x(svbool_t pg, svuint8_t zn);
// _f16, _f32, _f64
uint8x16_t svmaxqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t svminqv[_u8](svbool_t pg, svuint8_t zn);
```

#### FMAXNMQV, FMINNMQV
Expand All @@ -9686,13 +9694,8 @@ Max/Min recursive reduction of quadword vector segments.

``` c
// Variants are also available for _f32, _f64
float16x8_t svmaxnmqv[_f16]_m(svbool_t pg, svfloat16_t zn);
float16x8_t svmaxnmqv[_f16]_z(svbool_t pg, svfloat16_t zn);
float16x8_t svmaxnmqv[_f16]_x(svbool_t pg, svfloat16_t zn);

float16x8_t svminnmqv[_f16]_m(svbool_t pg, svfloat16_t zn);
float16x8_t svminnmqv[_f16]_z(svbool_t pg, svfloat16_t zn);
float16x8_t svminnmqv[_f16]_x(svbool_t pg, svfloat16_t zn);
float16x8_t svmaxnmqv[_f16](svbool_t pg, svfloat16_t zn);
float16x8_t svminnmqv[_f16](svbool_t pg, svfloat16_t zn);
```

#### BFMLSLB, BFMLSLT
Expand Down Expand Up @@ -9772,6 +9775,7 @@ Reverse doublewords in elements.
// All the intrinsics below are [SVE2.1 or SME]
// Variants are available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64
// _bf16, _f16, _f32, _f64
svuint8_t svrevd[_u8]_m(svuint8_t zd, svbool_t pg, svuint8_t zn);
svuint8_t svrevd[_u8]_z(svbool_t pg, svuint8_t zn);
svuint8_t svrevd[_u8]_x(svbool_t pg, svuint8_t zn);
Expand Down Expand Up @@ -9803,23 +9807,21 @@ Contiguous store of single vector operand. It is truncating store from quadword.
``` c

// Variants are also available for:
// s128_vnum[_s32]
void svst1w_u128_vnum[_u32](svbool_t pg, uint32_t *ptr, uint64_t vnum,
svuint32_t zt);
// _s8, _u16, _f16, _u32, _s32
// _bf16, _f16, _f32
void svst1quw[_u8](svbool_t pg, uint8_t *rn, svuint8_t zt);
void svst1quw_vnum[_u8](svbool_t pg, uint8_t *ptr, int64_t vnum,
svuint8_t zt);

// Variants are also available for:
// s128[_s32]
void svst1w_u128[_u32](svbool_t pg, uint32_t *rn, svuint32_t zt);

// Variants are also available for:
// s128_vnum[_s64]
void svst1d_u128_vnum[_u64](svbool_t pg, uint64_t *ptr, int64_t vnum,
svuint64_t zt);

// Variants are also available for:
// s128[_s64]
void svst1d_u128[_u64](svbool_t pg, uint64_t *rn, svuint64_t zt);
// _s8, _u16, _f16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
void svst1qud[_u8](svbool_t pg, uint8_t *rn, svuint8_t zt);
void svst1qud_vnum[_u8](svbool_t pg, uint8_t *ptr, int64_t vnum,
svuint8_t zt);
```

#### ST1B, ST1D, ST1H, ST1W

Contiguous store of multi-vector operand.
Expand Down Expand Up @@ -9926,27 +9928,17 @@ Scatter store quadwords.

``` c
// Variants are also available for:
// _u64base_u8, _u64base_u16, _u64base_s16, _u64base_u32, _u64base_s32,
// _u64base_u64, _u64base_s64
// _u64base_bf16, _u64base_f16, _u64base_f32, _u64base_f64
void svst1q_scatter[_u64base_s8](svbool_t pg, svint64_t zn, const void *rm,
svint8_t zt);
```

#### ST2Q, ST3Q, ST4Q

Contiguous store.
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
void svst1q_scatter[_u64base]_s8(svbool_t pg, svuint64_t zn, svint8_t data);
void svst1q_scatter[_u64base]_offset_s8(svbool_t pg, svuint64_t zn, int64_t offset, svint8_t data);
void svst1q_scatter[_u64base]_index_s8(svbool_t pg, svuint64_t zn, int64_t index, svint8_t data);
void svst1q_scatter_[u64]offset[_s8](svbool_t pg, const uint *base, svuint64_t offset, svint8_t data);

``` c
// Variants are also available for:
// _s8 _u16, _s16, _u32, _s32, _u64, _s64
// _u16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
void svst2q[_u8](svbool_t pg, uint8_t *rn, svuint8x2_t zt);
void svst2q_vnum[_u8](svbool_t pg, uint8_t *rn, int64_t vnum, svuint8x2_t zt);
void svst3q[_u8](svbool_t pg, uint8_t *rn, svuint8x3_t zt);
void svst3q_vnum[_u8](svbool_t pg, uint8_t *rn, int64_t vnum, svuint8x3_t zt);
void svst4q[_u8](svbool_t pg, uint8_t *rn, svuint8x4_t zt);
void svst4q_vnum[_u8](svbool_t pg, uint8_t *rn, int64_t vnum, svuint8x4_t zt);
void svst1q_scatter_[u64]index_s16(svbool_t pg, const int16_t *base, svuint64_t index, svint16_t data);
```

#### ST2Q, ST3Q, ST4Q
Expand All @@ -9971,9 +9963,9 @@ Programmable table lookup within each quadword vector segment (zeroing).

``` c
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svtblq[_u8](svuint8_t zn, svuint8_t zm);
svint8_t svtblq[_s8](svint8_t zn, svuint8_t zm);
```

#### TBXQ
Expand All @@ -9982,9 +9974,9 @@ Programmable table lookup within each quadword vector segment (merging).

``` c
// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
// _u8, _u16, _s16, _u32, _s32, _u64, _s64
// _bf16, _f16, _f32, _f64
svuint8_t svtbxq[_u8](svuint8_t zn, svuint8_t zm);
svint8_t svtbxq[_s8](svint8_t fallback, svint8_t zn, svuint8_t zm);
```

#### UZPQ1, UZPQ2
Expand Down

0 comments on commit 5508e93

Please sign in to comment.