Skip to content
This repository has been archived by the owner on Oct 21, 2023. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into jdk21-fix-8240567
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Jun 29, 2023
2 parents 074e7de + e67393f commit e99f7d0
Show file tree
Hide file tree
Showing 90 changed files with 2,969 additions and 2,894 deletions.
40 changes: 35 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,6 @@ source %{
}
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
Expand Down Expand Up @@ -6162,6 +6158,41 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector shuffle -------------------------------

instruct loadshuffle(vReg dst, vReg src) %{
match(Set dst (VectorLoadShuffle src));
format %{ "loadshuffle $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (bt == T_BYTE) {
if ($dst$$FloatRegister != $src$$FloatRegister) {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, $src$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
}
}
} else {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S/8S, 4I, 4F
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
if (type2aelembytes(bt) == 4) {
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
}
} else {
assert(UseSVE > 0, "must be sve");
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$src$$FloatRegister, __ B);
}
}
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -----------------------------

// Here is an example that rearranges a NEON vector with 4 ints:
Expand All @@ -6184,7 +6215,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
// to implement rearrange.

// Maybe move the shuffle preparation to VectorLoadShuffle
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0 &&
(Matcher::vector_element_basic_type(n) == T_SHORT ||
Expand Down
40 changes: 35 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,6 @@ source %{
}
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

// Assert that the given node is not a variable shift.
bool assert_not_var_shift(const Node* n) {
assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift");
Expand Down Expand Up @@ -4428,6 +4424,41 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector shuffle -------------------------------

instruct loadshuffle(vReg dst, vReg src) %{
match(Set dst (VectorLoadShuffle src));
format %{ "loadshuffle $dst, $src" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (bt == T_BYTE) {
if ($dst$$FloatRegister != $src$$FloatRegister) {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src$$FloatRegister, $src$$FloatRegister);
} else {
assert(UseSVE > 0, "must be sve");
__ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
}
}
} else {
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
// 4S/8S, 4I, 4F
__ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B);
if (type2aelembytes(bt) == 4) {
__ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H);
}
} else {
assert(UseSVE > 0, "must be sve");
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$src$$FloatRegister, __ B);
}
}
%}
ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -----------------------------

// Here is an example that rearranges a NEON vector with 4 ints:
Expand All @@ -4450,7 +4481,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{
// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
// to implement rearrange.

// Maybe move the shuffle preparation to VectorLoadShuffle
instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0 &&
(Matcher::vector_element_basic_type(n) == T_SHORT ||
Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/cpu/arm/arm.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1025,10 +1025,6 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/cpu/ppc/ppc.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2189,10 +2189,6 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
5 changes: 1 addition & 4 deletions src/hotspot/cpu/riscv/riscv_v.ad
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ source %{
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}
%}

definitions %{
Expand Down Expand Up @@ -4035,4 +4032,4 @@ instruct vtest_alltrue_branch(cmpOpEqNe cop, vRegMask op1, vRegMask op2, label l
__ enc_cmpEqNe_imm0_branch($cop$$cmpcode, t0, *($lbl$$label), /* is_far */ true);
%}
ins_pipe(pipe_slow);
%}
%}
4 changes: 0 additions & 4 deletions src/hotspot/cpu/s390/s390.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1529,10 +1529,6 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
Expand Down
77 changes: 59 additions & 18 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2151,19 +2151,6 @@ const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect*
return false;
}

// Return true if Vector::rearrange needs preparation of the shuffle argument
const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) {
switch (elem_bt) {
case T_BYTE: return false;
case T_SHORT: return !VM_Version::supports_avx512bw();
case T_INT: return !VM_Version::supports_avx();
case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
default:
ShouldNotReachHere();
return false;
}
}

MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
bool legacy = (generic_opnd->opcode() == LEGVEC);
Expand Down Expand Up @@ -8340,6 +8327,17 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
//-------------------------------- Rearrange ----------------------------------

// LoadShuffle/Rearrange for Byte

instruct loadShuffleB(vec dst) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
match(Set dst (VectorLoadShuffle dst));
format %{ "vector_load_shuffle $dst, $dst" %}
ins_encode %{
// empty
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeB(vec dst, vec shuffle) %{
predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
Matcher::vector_length(n) < 32);
Expand Down Expand Up @@ -8406,7 +8404,7 @@ instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{

instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
!VM_Version::supports_avx512bw());
Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
match(Set dst (VectorLoadShuffle src));
effect(TEMP dst, TEMP vtmp);
format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
Expand All @@ -8417,7 +8415,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
if (UseAVX == 0) {
assert(vlen_in_bytes <= 16, "required");
// Multiply each shuffle by two to get byte index
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
__ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
__ psllw($vtmp$$XMMRegister, 1);

// Duplicate to create 2 copies of byte index
Expand All @@ -8432,7 +8430,8 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
int vlen_enc = vector_length_encoding(this);
// Multiply each shuffle by two to get byte index
__ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
__ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);

// Duplicate to create 2 copies of byte index
__ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
Expand Down Expand Up @@ -8479,6 +8478,21 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe
ins_pipe( pipe_slow );
%}

instruct loadShuffleS_evex(vec dst, vec src) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
VM_Version::supports_avx512bw());
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
if (!VM_Version::supports_avx512vl()) {
vlen_enc = Assembler::AVX_512bit;
}
__ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
VM_Version::supports_avx512bw());
Expand Down Expand Up @@ -8509,7 +8523,7 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
// only byte shuffle instruction available on these platforms

// Duplicate and multiply each shuffle by 4
__ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
__ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
__ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
__ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
__ psllw($vtmp$$XMMRegister, 2);
Expand Down Expand Up @@ -8538,6 +8552,18 @@ instruct rearrangeI(vec dst, vec shuffle) %{
ins_pipe( pipe_slow );
%}

instruct loadShuffleI_avx(vec dst, vec src) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX > 0);
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
UseAVX > 0);
Expand Down Expand Up @@ -8567,7 +8593,8 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
// only double word shuffle instruction available on these platforms

// Multiply each shuffle by two to get double word index
__ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
__ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);

// Duplicate each double word shuffle
__ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
Expand All @@ -8593,6 +8620,20 @@ instruct rearrangeL(vec dst, vec src, vec shuffle) %{
ins_pipe( pipe_slow );
%}

instruct loadShuffleL_evex(vec dst, vec src) %{
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
match(Set dst (VectorLoadShuffle src));
format %{ "vector_load_shuffle $dst, $src" %}
ins_encode %{
assert(UseAVX > 2, "required");

int vlen_enc = vector_length_encoding(this);
__ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
(Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
Expand Down
18 changes: 18 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,24 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
do_name(vector_frombits_coerced_name, "fromBitsCoerced") \
\
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
"IIII" \
"Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
\
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/share/gc/z/zHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,19 +293,19 @@ bool ZHeap::is_allocating(zaddress addr) const {

void ZHeap::object_iterate(ObjectClosure* object_cl, bool visit_weaks) {
assert(SafepointSynchronize::is_at_safepoint(), "Should be at safepoint");
ZHeapIterator iter(1 /* nworkers */, visit_weaks);
ZHeapIterator iter(1 /* nworkers */, visit_weaks, false /* for_verify */);
iter.object_iterate(object_cl, 0 /* worker_id */);
}

void ZHeap::object_and_field_iterate(ObjectClosure* object_cl, OopFieldClosure* field_cl, bool visit_weaks) {
void ZHeap::object_and_field_iterate_for_verify(ObjectClosure* object_cl, OopFieldClosure* field_cl, bool visit_weaks) {
assert(SafepointSynchronize::is_at_safepoint(), "Should be at safepoint");
ZHeapIterator iter(1 /* nworkers */, visit_weaks);
ZHeapIterator iter(1 /* nworkers */, visit_weaks, true /* for_verify */);
iter.object_and_field_iterate(object_cl, field_cl, 0 /* worker_id */);
}

ParallelObjectIteratorImpl* ZHeap::parallel_object_iterator(uint nworkers, bool visit_weaks) {
assert(SafepointSynchronize::is_at_safepoint(), "Should be at safepoint");
return new ZHeapIterator(nworkers, visit_weaks);
return new ZHeapIterator(nworkers, visit_weaks, false /* for_verify */);
}

void ZHeap::serviceability_initialize() {
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/z/zHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class ZHeap {

// Iteration
void object_iterate(ObjectClosure* object_cl, bool visit_weaks);
void object_and_field_iterate(ObjectClosure* object_cl, OopFieldClosure* field_cl, bool visit_weaks);
void object_and_field_iterate_for_verify(ObjectClosure* object_cl, OopFieldClosure* field_cl, bool visit_weaks);
ParallelObjectIteratorImpl* parallel_object_iterator(uint nworkers, bool visit_weaks);

void threads_do(ThreadClosure* tc) const;
Expand Down
Loading

0 comments on commit e99f7d0

Please sign in to comment.