adds funct3 to vector functions

This commit is contained in:
Eyck-Alexander Jentzsch 2025-02-17 09:29:24 +01:00
parent dd4416ab15
commit b3f189145f
3 changed files with 285 additions and 192 deletions

View File

@ -188,81 +188,81 @@ if(vector != null) {%>
uint64_t vsxseg(uint8_t* V, uint8_t vs3, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){
return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_write, V, traits::VLEN, traits::XLEN, vs3, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered);
}
void vector_vector_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry = 0){
void vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry = 0){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
return softvector::vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
case 0b001:
return softvector::vector_vector_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
return softvector::vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
case 0b010:
return softvector::vector_vector_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
return softvector::vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
case 0b011:
return softvector::vector_vector_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
return softvector::vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast<softvector::carry_t>(carry));
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_imm_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry = 0){
void vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry = 0){
switch(sew_val){
case 0b000:
return softvector::vector_imm_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
return softvector::vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
case 0b001:
return softvector::vector_imm_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
return softvector::vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
case 0b010:
return softvector::vector_imm_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
return softvector::vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
case 0b011:
return softvector::vector_imm_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
return softvector::vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast<softvector::carry_t>(carry));
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_vector_wv(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
void vector_vector_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b001:
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b010:
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b011: // would widen to 128 bits
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_imm_wv(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
void vector_imm_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_imm_op<${vlen}, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_imm_op<${vlen}, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_imm_op<${vlen}, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011: // would widen to 128 bits
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_vector_ww(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
void vector_vector_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::vector_vector_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::vector_vector_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::vector_vector_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::vector_vector_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::vector_vector_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011: // would widen to 128 bits
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_imm_ww(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
void vector_imm_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_imm_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_imm_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_imm_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011: // would widen to 128 bits
default:
throw new std::runtime_error("Unsupported sew bit value");
@ -297,89 +297,89 @@ if(vector != null) {%>
throw new std::runtime_error("Unsupported target_sew_pow");
}
}
void vector_vector_m(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry){
vector_vector_op(V, funct, vl, vstart, vtype, 0, vd, vs2, vs1, sew_val, carry);
void vector_vector_m(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry){
vector_vector_op(V, funct6, funct3, vl, vstart, vtype, 0, vd, vs2, vs1, sew_val, carry);
}
void vector_imm_m(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry){
vector_imm_op(V, funct, vl, vstart, vtype, 0, vd, vs2, imm, sew_val, carry);
void vector_imm_m(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry){
vector_imm_op(V, funct6, funct3, vl, vstart, vtype, 0, vd, vs2, imm, sew_val, carry);
}
void carry_mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
void carry_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::carry_mask_vector_vector_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::carry_vector_vector_op<${vlen}, uint8_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::carry_mask_vector_vector_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::carry_vector_vector_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::carry_mask_vector_vector_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::carry_vector_vector_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011:
return softvector::carry_mask_vector_vector_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::carry_vector_vector_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void carry_mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
void carry_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::carry_mask_vector_imm_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::carry_vector_imm_op<${vlen}, uint8_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::carry_mask_vector_imm_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::carry_vector_imm_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::carry_mask_vector_imm_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::carry_vector_imm_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::carry_mask_vector_imm_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::carry_vector_imm_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
void mask_vector_vector_op(uint8_t* V, unsigned funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::mask_vector_vector_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::mask_vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::mask_vector_vector_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::mask_vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::mask_vector_vector_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::mask_vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011:
return softvector::mask_vector_vector_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1);
return softvector::mask_vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
void mask_vector_imm_op(uint8_t* V, unsigned funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::mask_vector_imm_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::mask_vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::mask_vector_imm_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::mask_vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::mask_vector_imm_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::mask_vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::mask_vector_imm_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::mask_vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_vector_vw(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
void vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b001:
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b010:
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
case 0b011: // would require 128 bits vs2 value
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_imm_vw(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
void vector_imm_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_imm_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_imm_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_imm_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vd, vs2, imm);
return softvector::vector_imm_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011: // would require 128 bits vs2 value
default:
throw new std::runtime_error("Unsupported sew bit value");

View File

@ -73,24 +73,24 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_size_byte, uint64_t elem_count,
uint8_t segment_size, bool ordered);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1, carry_t carry = carry_t::NO_CARRY);
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY);
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t>
void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
template <unsigned VLEN, typename elem_t>
void carry_mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1);
void mask_vector_vector_op(uint8_t* V, unsigned funct, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1);
template <unsigned VLEN, typename elem_t>
void carry_mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm);
void mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1);
void mask_vector_imm_op(uint8_t* V, unsigned funct, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<elem_t>::type imm);
void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1);
template <unsigned VLEN, typename elem_t>
void mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm);
void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm);
} // namespace softvector
#include "vm/vector_functions.hpp"
#endif /* _VM_VECTOR_FUNCTIONS_H_ */

View File

@ -70,77 +70,171 @@ template <typename elem_t> constexpr elem_t shift_mask() {
static_assert(std::numeric_limits<elem_t>::is_integer, "shift_mask only supports integer types");
return std::numeric_limits<elem_t>::digits - 1;
}
enum FUNCT3 {
OPIVV = 0b000,
OPFVV = 0b001,
OPMVV = 0b010,
OPIVI = 0b011,
OPIVX = 0b100,
OPFVF = 0b101,
OPMVX = 0b110,
};
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct) {
switch(funct) {
case 0b000000: // VADD
case 0b010000: // VADC
case 0b110000: // VWADDU
case 0b110100: // VWADDU.W
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b000010: // VSUB
case 0b110010: // VWSUBU
case 0b110110: // VWSUBU.W
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
case 0b000011: // VRSUB
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 - vs2; };
case 0b000100: // VMINU
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::min(vs2, static_cast<src2_elem_t>(vs1)); };
case 0b000101: // VMIN
return [](src2_elem_t vs2, src1_elem_t vs1) {
return std::min(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2),
static_cast<typename std::make_signed_t<src2_elem_t>>(vs1));
};
case 0b000110: // VMAXU
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::max(vs2, static_cast<src2_elem_t>(vs1)); };
case 0b000111: // VMAX
return [](src2_elem_t vs2, src1_elem_t vs1) {
return std::max(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2),
static_cast<typename std::make_signed_t<src2_elem_t>>(vs1));
};
case 0b001001: // VAND
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 & vs2; };
case 0b001010: // VOR
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
case 0b001011: // VXOR
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
case 0b100101: // VSLL
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
case 0b101000: // VSRL
case 0b101100: // VNSRL
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
case 0b101001: // VSRA
case 0b101101: // VNSRA
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
};
case 0b110001: // VWADD
case 0b110101: // VWADD.W
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) +
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
case 0b010010: // VSBC
case 0b110011: // VWSUB
case 0b110111: // VWSUB.W
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) -
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
default:
throw new std::runtime_error("Uknown funct in get_funct");
}
std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6, unsigned funct3) {
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
switch(funct6) {
case 0b000000: // VADD
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b000010: // VSUB
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
case 0b000011: // VRSUB
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 - vs2; };
case 0b000100: // VMINU
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::min(vs2, static_cast<src2_elem_t>(vs1)); };
case 0b000101: // VMIN
return [](src2_elem_t vs2, src1_elem_t vs1) {
return std::min(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2),
static_cast<typename std::make_signed_t<src2_elem_t>>(vs1));
};
case 0b000110: // VMAXU
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::max(vs2, static_cast<src2_elem_t>(vs1)); };
case 0b000111: // VMAX
return [](src2_elem_t vs2, src1_elem_t vs1) {
return std::max(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2),
static_cast<typename std::make_signed_t<src2_elem_t>>(vs1));
};
case 0b001001: // VAND
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 & vs2; };
case 0b001010: // VOR
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
case 0b001011: // VXOR
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
// case 0b001100: // VRGATHER
// case 0b001110: // VRGATHEREI16
// case 0b001111: // VLSLIDEDOWN
case 0b010000: // VADC
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b010010: // VSBC
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) -
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
// case 0b010111: // VMERGE / VMV
// case 0b100000: // VSADDU
// case 0b100001: // VSADD
// case 0b100010: // VSSUBU
// case 0b100011: // VSSUB
case 0b100101: // VSLL
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
// case 0b100111: // VSMUL
// case 0b100111: // VMV<NR>R
case 0b101000: // VSRL
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
case 0b101001: // VSRA
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
};
case 0b101100: // VNSRL
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
case 0b101101: // VNSRA
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
};
// case 0b101110: // VNCLIPU
// case 0b101111: // VNCLIP
// case 0b110000: // VWREDSUMU
// case 0b110001: // VWREDSUM
default:
throw new std::runtime_error("Uknown funct6 in get_funct");
}
else if(funct3 == OPMVV || funct3 == OPMVX)
switch(funct6) {
// case 0b000000: // VREDSUM
// case 0b000001: // VREDAND
// case 0b000010: // VREDOR
// case 0b000011: // VREDXOR
// case 0b000100: // VREDMINU
// case 0b000101: // VREDMIN
// case 0b000110: // VREDMAXU
// case 0b000111: // VREDMAX
// case 0b001000: // VAADDU
// case 0b001001: // VAADD
// case 0b001010: // VASUBU
// case 0b001011: // VASUB
// case 0b001110: // VSLID1EUP
// case 0b001111: // VSLIDE1DOWN
// case 0b010111: // VCOMPRESS
// case 0b011000: // VMANDN
// case 0b011001: // VMAND
// case 0b011010: // VMOR
// case 0b011011: // VMXOR
// case 0b011100: // VMORN
// case 0b011101: // VMNAND
// case 0b011110: // VMNOR
// case 0b011111: // VMXNOR
// case 0b100000: // VDIVU
// case 0b100001: // VDIV
// case 0b100010: // VREMU
// case 0b100011: // VREM
// case 0b100100: // VMULHU
// case 0b100101: // VMUL
// case 0b100110: // VMULHSU
// case 0b100111: // VMULH
// case 0b101001: // VMADD
// case 0b101011: // VNMSUB
// case 0b101101: // VMACC
// case 0b101111: // VNMSAC
case 0b110000: // VWADDU
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b110001: // VWADD
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) +
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
case 0b110010: // VWSUBU
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
case 0b110011: // VWSUB
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) -
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
case 0b110100: // VWADDU.W
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b110101: // VWADD.W
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) +
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
case 0b110110: // VWSUBU.W
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
case 0b110111: // VWSUB.W
return [](src2_elem_t vs2, src1_elem_t vs1) {
return static_cast<typename std::make_signed_t<dest_elem_t>>(static_cast<typename std::make_signed_t<src2_elem_t>>(vs2) -
static_cast<typename std::make_signed_t<src1_elem_t>>(vs1));
};
// case 0b111000: // VWMULU
// case 0b111010: // VWMULSU
// case 0b111011: // VWMUL
// case 0b111100: // VWMACCU
// case 0b111101: // VWMACC
// case 0b111110: // VWMACCUS
// case 0b111111: // VWMACCSU
default:
throw new std::runtime_error("Uknown funct6 in get_funct");
}
else
throw new std::runtime_error("Unknown funct3 in get_funct");
}
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1, carry_t carry) {
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1, carry_t carry) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, src1_elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6);
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
if(carry == carry_t::NO_CARRY) {
@ -170,13 +264,13 @@ void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart,
return;
}
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<src1_elem_t>::type imm, carry_t carry) {
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6);
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
if(carry == carry_t::NO_CARRY) {
@ -222,55 +316,6 @@ template <typename elem_t> std::function<bool(elem_t, elem_t, elem_t)> get_carry
throw new std::runtime_error("Uknown funct in get_carry_mask_funct");
}
}
template <unsigned VLEN, typename elem_t>
void carry_mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_carry_mask_funct<elem_t>(funct);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
elem_t carry = vm ? 0 : mask_reg[idx];
bool new_bit_value = fn(vs2_view[idx], vs1_view[idx], carry);
uint8_t* cur_mask_byte_addr = vd_mask_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
// always tail agnostic
}
return;
}
template <unsigned VLEN, typename elem_t>
void carry_mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_carry_mask_funct<elem_t>(funct);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
elem_t carry = vm ? 0 : mask_reg[idx];
bool new_bit_value = fn(vs2_view[idx], imm, carry);
uint8_t* cur_mask_byte_addr = vd_mask_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
// always tail agnostic
}
return;
}
template <typename elem_t> std::function<bool(elem_t, elem_t)> get_mask_funct(unsigned funct) {
switch(funct) {
case 0b011000: // VMSEQ
@ -301,14 +346,14 @@ template <typename elem_t> std::function<bool(elem_t, elem_t)> get_mask_funct(un
}
}
template <unsigned VLEN, typename elem_t>
void mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1) {
void mask_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_mask_funct<elem_t>(funct);
auto fn = get_mask_funct<elem_t>(funct6);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
@ -334,13 +379,13 @@ void mask_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vst
return;
}
template <unsigned VLEN, typename elem_t>
void mask_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm) {
void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<elem_t>::type imm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_mask_funct<elem_t>(funct);
auto fn = get_mask_funct<elem_t>(funct6);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
@ -380,7 +425,6 @@ std::function<dest_elem_t(src2_elem_t)> get_unary_fn(unsigned unary_op) {
throw new std::runtime_error("Uknown funct in get_unary_fn");
}
}
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t>
void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
@ -406,4 +450,53 @@ void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart
}
return;
}
template <unsigned VLEN, typename elem_t>
void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_carry_mask_funct<elem_t>(funct);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
elem_t carry = vm ? 0 : mask_reg[idx];
bool new_bit_value = fn(vs2_view[idx], vs1_view[idx], carry);
uint8_t* cur_mask_byte_addr = vd_mask_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
// always tail agnostic
}
return;
}
template <unsigned VLEN, typename elem_t>
void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
typename std::make_signed<elem_t>::type imm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_carry_mask_funct<elem_t>(funct);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
elem_t carry = vm ? 0 : mask_reg[idx];
bool new_bit_value = fn(vs2_view[idx], imm, carry);
uint8_t* cur_mask_byte_addr = vd_mask_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail
for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) {
// always tail agnostic
}
return;
}
} // namespace softvector