corrects vectorslide, changes all loop index type
This commit is contained in:
parent
42bf6ee380
commit
fe9f2a5455
@ -629,13 +629,13 @@ if(vector != null) {%>
|
|||||||
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
||||||
switch(sew_val){
|
switch(sew_val){
|
||||||
case 0b000:
|
case 0b000:
|
||||||
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
return softvector::vector_slideup<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b001:
|
case 0b001:
|
||||||
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
return softvector::vector_slideup<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b010:
|
case 0b010:
|
||||||
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
return softvector::vector_slideup<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b011:
|
case 0b011:
|
||||||
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
return softvector::vector_slideup<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
default:
|
default:
|
||||||
throw new std::runtime_error("Unsupported sew bit value");
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
}
|
}
|
||||||
@ -643,13 +643,13 @@ if(vector != null) {%>
|
|||||||
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
||||||
switch(sew_val){
|
switch(sew_val){
|
||||||
case 0b000:
|
case 0b000:
|
||||||
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
return softvector::vector_slidedown<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b001:
|
case 0b001:
|
||||||
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
return softvector::vector_slidedown<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b010:
|
case 0b010:
|
||||||
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
return softvector::vector_slidedown<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
case 0b011:
|
case 0b011:
|
||||||
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
return softvector::vector_slidedown<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||||
default:
|
default:
|
||||||
throw new std::runtime_error("Unsupported sew bit value");
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
}
|
}
|
||||||
|
@ -124,11 +124,13 @@ void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uns
|
|||||||
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd);
|
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd);
|
||||||
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector);
|
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector);
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
|
||||||
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
|
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
|
||||||
template <unsigned VLEN, typename dest_elem_t, typename scr_elem_t = dest_elem_t>
|
template <unsigned VLEN, typename dest_elem_t, typename scr_elem_t = dest_elem_t>
|
||||||
void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
|
void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
|
||||||
template <unsigned VLEN, typename scr_elem_t>
|
template <unsigned VLEN, typename scr_elem_t>
|
||||||
|
@ -349,7 +349,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
if(carry == carry_t::NO_CARRY) {
|
if(carry == carry_t::NO_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
||||||
@ -358,18 +358,18 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(carry == carry_t::SUB_CARRY) {
|
} else if(carry == carry_t::SUB_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) - mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) - mask_reg[idx];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + mask_reg[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -385,7 +385,7 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
if(carry == carry_t::NO_CARRY) {
|
if(carry == carry_t::NO_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm);
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm);
|
||||||
@ -394,18 +394,18 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(carry == carry_t::SUB_CARRY) {
|
} else if(carry == carry_t::SUB_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) - mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) - mask_reg[idx];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + mask_reg[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -417,7 +417,7 @@ void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype
|
|||||||
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, elem_count);
|
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, elem_count);
|
||||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
||||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active)
|
if(mask_active)
|
||||||
vd_view[idx] = vs1_view[idx];
|
vd_view[idx] = vs1_view[idx];
|
||||||
@ -431,7 +431,7 @@ void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, b
|
|||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
||||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active)
|
if(mask_active)
|
||||||
vd_view[idx] = imm;
|
vd_view[idx] = imm;
|
||||||
@ -503,7 +503,7 @@ void mask_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_
|
|||||||
auto fn = get_mask_funct<elem_t>(funct6, funct3);
|
auto fn = get_mask_funct<elem_t>(funct6, funct3);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx]);
|
vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx]);
|
||||||
@ -513,7 +513,7 @@ void mask_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_
|
|||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
||||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -528,7 +528,7 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v
|
|||||||
auto fn = get_mask_funct<elem_t>(funct6, funct3);
|
auto fn = get_mask_funct<elem_t>(funct6, funct3);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_mask_view[idx] = fn(vs2_view[idx], imm);
|
vd_mask_view[idx] = fn(vs2_view[idx], imm);
|
||||||
@ -538,7 +538,7 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v
|
|||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
||||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -567,7 +567,7 @@ void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart
|
|||||||
auto fn = get_unary_fn<dest_elem_t, src2_elem_t>(unary_op);
|
auto fn = get_unary_fn<dest_elem_t, src2_elem_t>(unary_op);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(vs2_view[idx]);
|
vd_view[idx] = fn(vs2_view[idx]);
|
||||||
@ -578,7 +578,7 @@ void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart
|
|||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -608,13 +608,13 @@ void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vs
|
|||||||
auto fn = get_carry_funct<elem_t>(funct);
|
auto fn = get_carry_funct<elem_t>(funct);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
elem_t carry = vm ? 0 : mask_reg[idx];
|
elem_t carry = vm ? 0 : mask_reg[idx];
|
||||||
vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx], carry);
|
vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx], carry);
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
||||||
// always tail agnostic
|
// always tail agnostic
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -629,13 +629,13 @@ void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstar
|
|||||||
auto fn = get_carry_funct<elem_t>(funct);
|
auto fn = get_carry_funct<elem_t>(funct);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
elem_t carry = vm ? 0 : mask_reg[idx];
|
elem_t carry = vm ? 0 : mask_reg[idx];
|
||||||
vd_mask_view[idx] = fn(vs2_view[idx], imm, carry);
|
vd_mask_view[idx] = fn(vs2_view[idx], imm, carry);
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) {
|
||||||
// always tail agnostic
|
// always tail agnostic
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -814,7 +814,7 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
|||||||
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
||||||
@ -825,7 +825,7 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
|||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return saturated;
|
return saturated;
|
||||||
@ -841,7 +841,7 @@ bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl
|
|||||||
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], imm);
|
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], imm);
|
||||||
@ -852,7 +852,7 @@ bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl
|
|||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return saturated;
|
return saturated;
|
||||||
@ -916,7 +916,7 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
||||||
auto fn = get_red_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
auto fn = get_red_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
||||||
dest_elem_t& running_total = {vs1_elem};
|
dest_elem_t& running_total = {vs1_elem};
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
fn(running_total, vs2_view[idx]);
|
fn(running_total, vs2_view[idx]);
|
||||||
@ -924,7 +924,7 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
}
|
}
|
||||||
vd_view[0] = running_total;
|
vd_view[0] = running_total;
|
||||||
// the tail is all elements of the destination register beyond the first one
|
// the tail is all elements of the destination register beyond the first one
|
||||||
for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
for(size_t idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1228,7 +1228,7 @@ void fp_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
|||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
||||||
@ -1240,7 +1240,7 @@ void fp_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
|||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1256,7 +1256,7 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||||
// body is from vstart to min(elem_count, vl)
|
// body is from vstart to min(elem_count, vl)
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], imm);
|
vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], imm);
|
||||||
@ -1268,7 +1268,7 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
// elements w/ index larger than vl are in the tail
|
// elements w/ index larger than vl are in the tail
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1324,7 +1324,7 @@ void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
auto fn = get_fp_red_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
auto fn = get_fp_red_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
||||||
dest_elem_t& running_total = {vs1_elem};
|
dest_elem_t& running_total = {vs1_elem};
|
||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
fn(rm, accrued_flags, running_total, vs2_view[idx]);
|
fn(rm, accrued_flags, running_total, vs2_view[idx]);
|
||||||
@ -1333,7 +1333,7 @@ void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
vd_view[0] = running_total;
|
vd_view[0] = running_total;
|
||||||
softfloat_exceptionFlags = accrued_flags;
|
softfloat_exceptionFlags = accrued_flags;
|
||||||
// the tail is all elements of the destination register beyond the first one
|
// the tail is all elements of the destination register beyond the first one
|
||||||
for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
for(size_t idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1517,7 +1517,7 @@ void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op,
|
|||||||
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
||||||
auto fn = get_fp_unary_fn<dest_elem_t, src_elem_t>(encoding_space, unary_op);
|
auto fn = get_fp_unary_fn<dest_elem_t, src_elem_t>(encoding_space, unary_op);
|
||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(rm, accrued_flags, vs2_view[idx]);
|
vd_view[idx] = fn(rm, accrued_flags, vs2_view[idx]);
|
||||||
@ -1527,7 +1527,7 @@ void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op,
|
|||||||
}
|
}
|
||||||
softfloat_exceptionFlags = accrued_flags;
|
softfloat_exceptionFlags = accrued_flags;
|
||||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8);
|
||||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1593,7 +1593,7 @@ void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t
|
|||||||
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
||||||
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], vs1_view[idx]);
|
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], vs1_view[idx]);
|
||||||
@ -1602,7 +1602,7 @@ void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
softfloat_exceptionFlags = accrued_flags;
|
softfloat_exceptionFlags = accrued_flags;
|
||||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
for(size_t idx = vl; idx < VLEN; idx++) {
|
||||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1616,7 +1616,7 @@ void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vs
|
|||||||
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
||||||
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
||||||
uint8_t accrued_flags = 0;
|
uint8_t accrued_flags = 0;
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], imm);
|
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], imm);
|
||||||
@ -1625,7 +1625,7 @@ void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
softfloat_exceptionFlags = accrued_flags;
|
softfloat_exceptionFlags = accrued_flags;
|
||||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
for(size_t idx = vl; idx < VLEN; idx++) {
|
||||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1637,11 +1637,11 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin
|
|||||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||||
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
|
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
|
||||||
auto fn = get_mask_funct<unsigned>(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct
|
auto fn = get_mask_funct<unsigned>(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]);
|
vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]);
|
||||||
}
|
}
|
||||||
// the tail is all elements of the destination register beyond the first one
|
// the tail is all elements of the destination register beyond the first one
|
||||||
for(unsigned idx = 1; idx < VLEN; idx++) {
|
for(size_t idx = 1; idx < VLEN; idx++) {
|
||||||
// always tail agnostic
|
// always tail agnostic
|
||||||
// this is a nop, placeholder for vta behavior
|
// this is a nop, placeholder for vta behavior
|
||||||
vd_view[idx] = vd_view[idx];
|
vd_view[idx] = vd_view[idx];
|
||||||
@ -1653,7 +1653,7 @@ template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart
|
|||||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
unsigned running_total = 0;
|
unsigned running_total = 0;
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active && vs2_view[idx])
|
if(mask_active && vs2_view[idx])
|
||||||
running_total += 1;
|
running_total += 1;
|
||||||
@ -1664,7 +1664,7 @@ template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstar
|
|||||||
uint64_t elem_count = VLEN;
|
uint64_t elem_count = VLEN;
|
||||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active && vs2_view[idx])
|
if(mask_active && vs2_view[idx])
|
||||||
return idx;
|
return idx;
|
||||||
@ -1714,14 +1714,14 @@ template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl,
|
|||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
auto fn = get_mask_set_funct(enc);
|
auto fn = get_mask_set_funct(enc);
|
||||||
bool marker = false;
|
bool marker = false;
|
||||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(marker, vs2_view[idx]);
|
vd_view[idx] = fn(marker, vs2_view[idx]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the tail is all elements of the destination register beyond the first one
|
// the tail is all elements of the destination register beyond the first one
|
||||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
for(size_t idx = vl; idx < VLEN; idx++) {
|
||||||
// always tail agnostic
|
// always tail agnostic
|
||||||
// this is a nop, placeholder for vta behavior
|
// this is a nop, placeholder for vta behavior
|
||||||
vd_view[idx] = vd_view[idx];
|
vd_view[idx] = vd_view[idx];
|
||||||
@ -1734,7 +1734,7 @@ void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uns
|
|||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
unsigned current = 0;
|
unsigned current = 0;
|
||||||
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
for(size_t idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = current;
|
vd_view[idx] = current;
|
||||||
@ -1748,7 +1748,7 @@ template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl,
|
|||||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
for(size_t idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = idx;
|
vd_view[idx] = idx;
|
||||||
@ -1761,37 +1761,53 @@ template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, v
|
|||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vlmax);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vlmax);
|
||||||
if(to_vector) {
|
if(to_vector) {
|
||||||
vd_view[0] = val;
|
vd_view[0] = val;
|
||||||
for(unsigned idx = 1; idx < vlmax; idx++) {
|
for(size_t idx = 1; idx < vlmax; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return static_cast<int64_t>(static_cast<std::make_signed_t<src_elem_t>>(vd_view[0]));
|
return static_cast<int64_t>(static_cast<std::make_signed_t<src_elem_t>>(vd_view[0]));
|
||||||
}
|
}
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
|
||||||
uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8);
|
uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||||
for(unsigned idx = std::max<int64_t>(vstart, imm); idx < vl; idx++) {
|
for(size_t idx = std::max(vstart, imm); idx < vl; idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
src_elem_t src_elem = 0;
|
|
||||||
if(imm >= 0 || (idx - imm < elem_count))
|
|
||||||
src_elem = vs2_view[idx - imm];
|
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = src_elem;
|
vd_view[idx] = idx - imm < elem_count ? vs2_view[idx - imm] : 0;
|
||||||
} else {
|
} else {
|
||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(unsigned idx = vl; idx < elem_count; idx++) {
|
for(size_t idx = vl; idx < elem_count; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
|
||||||
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, 1);
|
uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8);
|
||||||
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
|
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||||
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||||
|
for(size_t idx = vstart; idx < vl; idx++) {
|
||||||
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
|
if(mask_active) {
|
||||||
|
vd_view[idx] = std::numeric_limits<uint64_t>::max() - idx > imm && idx + imm < elem_count ? vs2_view[idx + imm] : 0;
|
||||||
|
} else {
|
||||||
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(size_t idx = vl; idx < elem_count; idx++) {
|
||||||
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
|
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
|
||||||
|
vector_slideup<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, 1);
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, 1);
|
vmask_view mask_reg = read_vmask<VLEN>(V, 1);
|
||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
|
||||||
if(vm || mask_reg[0])
|
if(vm || mask_reg[0])
|
||||||
@ -1800,8 +1816,8 @@ void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bo
|
|||||||
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
|
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
|
||||||
}
|
}
|
||||||
template <unsigned VLEN, typename src_elem_t>
|
template <unsigned VLEN, typename src_elem_t>
|
||||||
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
|
||||||
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, -1);
|
vector_slidedown<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, 1);
|
||||||
if(vl > 0) {
|
if(vl > 0) {
|
||||||
vmask_view mask_reg = read_vmask<VLEN>(V, vl);
|
vmask_view mask_reg = read_vmask<VLEN>(V, vl);
|
||||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vl);
|
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vl);
|
||||||
@ -1818,7 +1834,7 @@ void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtyp
|
|||||||
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, vlmax);
|
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, vlmax);
|
||||||
auto vs2_view = get_vreg<VLEN, dest_elem_t>(V, vs2, vlmax);
|
auto vs2_view = get_vreg<VLEN, dest_elem_t>(V, vs2, vlmax);
|
||||||
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, vlmax);
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, vlmax);
|
||||||
for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = (vs1_view[idx] >= vlmax) ? 0 : vs2_view[vs1_view[idx]];
|
vd_view[idx] = (vs1_view[idx] >= vlmax) ? 0 : vs2_view[vs1_view[idx]];
|
||||||
@ -1826,7 +1842,7 @@ void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtyp
|
|||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(unsigned idx = vl; idx < vlmax; idx++) {
|
for(size_t idx = vl; idx < vlmax; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1837,7 +1853,7 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype,
|
|||||||
vmask_view mask_reg = read_vmask<VLEN>(V, vlmax);
|
vmask_view mask_reg = read_vmask<VLEN>(V, vlmax);
|
||||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, vlmax);
|
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, vlmax);
|
||||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, vlmax);
|
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, vlmax);
|
||||||
for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = (imm >= vlmax) ? 0 : vs2_view[imm];
|
vd_view[idx] = (imm >= vlmax) ? 0 : vs2_view[imm];
|
||||||
@ -1845,7 +1861,7 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype,
|
|||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(unsigned idx = vl; idx < vlmax; idx++) {
|
for(size_t idx = vl; idx < vlmax; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -1857,13 +1873,13 @@ void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, un
|
|||||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, vlmax);
|
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, vlmax);
|
||||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, vlmax);
|
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, vlmax);
|
||||||
unsigned current_pos = 0;
|
unsigned current_pos = 0;
|
||||||
for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) {
|
||||||
if(mask_reg[idx]) {
|
if(mask_reg[idx]) {
|
||||||
vd_view[current_pos] = vs2_view[idx];
|
vd_view[current_pos] = vs2_view[idx];
|
||||||
current_pos += 1;
|
current_pos += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(unsigned idx = current_pos; idx < vlmax; idx++) {
|
for(size_t idx = current_pos; idx < vlmax; idx++) {
|
||||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user