adds initial working version of llvm backend

This commit is contained in:
Eyck-Alexander Jentzsch 2023-09-19 16:26:07 +02:00
parent b360fc2c75
commit de45d06878
4 changed files with 4083 additions and 3649 deletions

View File

@ -95,6 +95,7 @@ if(TARGET RapidJSON)
endif()
if(WITH_LLVM)
find_package(LLVM)
target_compile_definitions(${PROJECT_NAME} PUBLIC ${LLVM_DEFINITIONS})
target_include_directories(${PROJECT_NAME} PUBLIC ${LLVM_INCLUDE_DIRS})
if(BUILD_SHARED_LIBS)

View File

@ -58,6 +58,7 @@ using namespace iss::debugger;
template <typename ARCH> class vm_impl : public iss::llvm::vm_base<ARCH> {
public:
using traits = arch::traits<ARCH>;
using super = typename iss::llvm::vm_base<ARCH>;
using virt_addr_t = typename super::virt_addr_t;
using phys_addr_t = typename super::phys_addr_t;
@ -80,7 +81,7 @@ public:
protected:
using vm_base<ARCH>::get_reg_ptr;
inline const char *name(size_t index){return traits<ARCH>::reg_aliases.at(index);}
inline const char *name(size_t index){return traits::reg_aliases.at(index);}
template <typename T> inline ConstantInt *size(T type) {
return ConstantInt::get(getContext(), APInt(32, type->getType()->getScalarSizeInBits()));
@ -88,7 +89,7 @@ protected:
void setup_module(Module* m) override {
super::setup_module(m);
iss::llvm::fp_impl::add_fp_functions_2_module(m, traits<ARCH>::FP_REGS_SIZE, traits<ARCH>::XLEN);
iss::llvm::fp_impl::add_fp_functions_2_module(m, traits::FP_REGS_SIZE, traits::XLEN);
}
inline Value *gen_choose(Value *cond, Value *trueVal, Value *falseVal, unsigned size) {
@ -114,88 +115,70 @@ protected:
}
inline void gen_set_pc(virt_addr_t pc, unsigned reg_num) {
Value *next_pc_v = this->builder.CreateSExtOrTrunc(this->gen_const(traits<ARCH>::XLEN, pc.val),
this->get_type(traits<ARCH>::XLEN));
Value *next_pc_v = this->builder.CreateSExtOrTrunc(this->gen_const(traits::XLEN, pc.val),
this->get_type(traits::XLEN));
this->builder.CreateStore(next_pc_v, get_reg_ptr(reg_num), true);
}
// some compile time constants
// enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 };
enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 };
enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 };
enum { LUT_SIZE = 1 << util::bit_count(static_cast<uint64_t>(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast<uint64_t>(EXTR_MASK16)) };
using this_class = vm_impl<ARCH>;
using compile_func = std::tuple<continuation_e, BasicBlock *> (this_class::*)(virt_addr_t &pc,
code_word_t instr,
BasicBlock *bb);
std::array<compile_func, LUT_SIZE> lut;
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
std::array<compile_func, LUT_SIZE> lut_11;
std::array<compile_func *, 4> qlut;
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
compile_func f) {
if (pos < 0) {
lut[idx] = f;
} else {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
} else {
if ((valid & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
} else {
auto new_val = idx << 1;
if ((value & bitmask) != 0) new_val++;
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
}
}
}
}
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
if (pos >= 0) {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
lut_val = extract_fields(pos - 1, val, mask, lut_val);
} else {
auto new_val = lut_val << 1;
if ((val & bitmask) != 0) new_val++;
lut_val = extract_fields(pos - 1, val, mask, new_val);
}
}
return lut_val;
}
template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
inline S sext(U from) {
auto mask = (1ULL<<W) - 1;
auto sign_mask = 1ULL<<(W-1);
return (from & mask) | ((from & sign_mask) ? ~mask : 0);
}
private:
/****************************************************************************
* start opcode definitions
****************************************************************************/
struct InstructionDesriptor {
struct instruction_descriptor {
size_t length;
uint32_t value;
uint32_t mask;
compile_func op;
};
struct decoding_tree_node{
std::vector<instruction_descriptor> instrs;
std::vector<decoding_tree_node*> children;
uint32_t submask = std::numeric_limits<uint32_t>::max();
uint32_t value;
decoding_tree_node(uint32_t value) : value(value){}
};
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{
decoding_tree_node* root {nullptr};
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
/* instruction ${instr.instruction.name} */
{${instr.length}, ${instr.value}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
/* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
{${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
}};
/* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
/* instruction ${idx}: ${instr.name} */
std::tuple<continuation_e, BasicBlock*> __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, BasicBlock* bb){<%instr.code.eachLine{%>
${it}<%}%>
std::tuple<continuation_e, BasicBlock*> __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, BasicBlock* bb){
bb->setName(fmt::format("${instr.name}_0x{:X}",pc.val));
this->gen_sync(PRE_SYNC,${idx});
uint64_t PC = pc.val;
<%instr.fields.eachLine{%>${it}
<%}%>if(this->disass_enabled){
/* generate console output when executing the command */<%instr.disass.eachLine{%>
${it}<%}%>
}
auto cur_pc_val = this->gen_const(32,pc.val);
pc=pc+ ${instr.length/8};
this->gen_set_pc(pc, traits::NEXT_PC);
<%instr.behavior.eachLine{%>${it}
<%}%>
this->gen_trap_check(bb);
this->gen_sync(POST_SYNC, ${idx});
this->builder.CreateBr(bb);
return returnValue;
}
<%}%>
/****************************************************************************
@ -203,23 +186,75 @@ private:
****************************************************************************/
std::tuple<continuation_e, BasicBlock *> illegal_intruction(virt_addr_t &pc, code_word_t instr, BasicBlock *bb) {
this->gen_sync(iss::PRE_SYNC, instr_descr.size());
this->builder.CreateStore(this->builder.CreateLoad(this->get_typeptr(traits<ARCH>::NEXT_PC), get_reg_ptr(traits<ARCH>::NEXT_PC), true),
get_reg_ptr(traits<ARCH>::PC), true);
this->builder.CreateStore(this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), true),
get_reg_ptr(traits::PC), true);
this->builder.CreateStore(
this->builder.CreateAdd(this->builder.CreateLoad(this->get_typeptr(traits<ARCH>::ICOUNT), get_reg_ptr(traits<ARCH>::ICOUNT), true),
this->builder.CreateAdd(this->builder.CreateLoad(this->get_typeptr(traits::ICOUNT), get_reg_ptr(traits::ICOUNT), true),
this->gen_const(64U, 1)),
get_reg_ptr(traits<ARCH>::ICOUNT), true);
get_reg_ptr(traits::ICOUNT), true);
pc = pc + ((instr & 3) == 3 ? 4 : 2);
this->gen_raise_trap(0, 2); // illegal instruction trap
this->gen_sync(iss::POST_SYNC, instr_descr.size());
this->gen_trap_check(this->leave_blk);
return std::make_tuple(BRANCH, nullptr);
}
//decoding functionality
void populate_decoding_tree(decoding_tree_node* root){
//create submask
for(auto instr: root->instrs){
root->submask &= instr.mask;
}
//put each instr according to submask&encoding into children
for(auto instr: root->instrs){
bool foundMatch = false;
for(auto child: root->children){
//use value as identifying trait
if(child->value == (instr.value&root->submask)){
child->instrs.push_back(instr);
foundMatch = true;
}
}
if(!foundMatch){
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
child->instrs.push_back(instr);
root->children.push_back(child);
}
}
root->instrs.clear();
//call populate_decoding_tree for all children
if(root->children.size() >1)
for(auto child: root->children){
populate_decoding_tree(child);
}
else{
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
return instr1.mask > instr2.mask;
});
}
}
compile_func decode_instr(decoding_tree_node* node, code_word_t word){
if(!node->children.size()){
if(node->instrs.size() == 1) return node->instrs[0].op;
for(auto instr : node->instrs){
if((instr.mask&word) == instr.value) return instr.op;
}
}
else{
for(auto child : node->children){
if (child->value == (node->submask&word)){
return decode_instr(child, word);
}
}
}
return nullptr;
}
};
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
volatile CODE_WORD x = insn;
insn = 2 * x;
template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
volatile CODE_WORD x = instr;
instr = 2 * x;
}
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
@ -227,14 +262,11 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id) {
qlut[0] = lut_00.data();
qlut[1] = lut_01.data();
qlut[2] = lut_10.data();
qlut[3] = lut_11.data();
for (auto instr : instr_descr) {
auto quantrant = instr.value & 0x3;
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
for(auto instr:instr_descr){
root->instrs.push_back(instr);
}
populate_decoding_tree(root);
}
template <typename ARCH>
@ -242,50 +274,50 @@ std::tuple<continuation_e, BasicBlock *>
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
// we fetch at max 4 byte, alignment is 2
enum {TRAP_ID=1<<16};
code_word_t insn = 0;
// const typename traits<ARCH>::addr_t upper_bits = ~traits<ARCH>::PGMASK;
code_word_t instr = 0;
// const typename traits::addr_t upper_bits = ~traits::PGMASK;
phys_addr_t paddr(pc);
auto *const data = (uint8_t *)&insn;
paddr = this->core.v2p(pc);
auto *const data = (uint8_t *)&instr;
if(this->core.has_mmu())
paddr = this->core.virt2phys(pc);
//TODO: re-add page handling
// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
// auto res = this->core.read(paddr, 2, data);
// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
// if ((instr & 0x3) == 0x3) { // this is a 32bit instruction
// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
// }
// } else {
auto res = this->core.read(paddr, 4, data);
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// }
if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
// curr pc on stack
++inst_cnt;
auto lut_val = extract_fields(insn);
auto f = qlut[insn & 0x3][lut_val];
auto f = decode_instr(root, instr);
if (f == nullptr) {
f = &this_class::illegal_intruction;
}
return (this->*f)(pc, insn, this_block);
return (this->*f)(pc, instr, this_block);
}
template <typename ARCH> void vm_impl<ARCH>::gen_leave_behavior(BasicBlock *leave_blk) {
this->builder.SetInsertPoint(leave_blk);
this->builder.CreateRet(this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::NEXT_PC), get_reg_ptr(arch::traits<ARCH>::NEXT_PC), false));
this->builder.CreateRet(this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC),get_reg_ptr(traits::NEXT_PC), false));
}
template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(uint16_t trap_id, uint16_t cause) {
auto *TRAP_val = this->gen_const(32, 0x80 << 24 | (cause << 16) | trap_id);
this->builder.CreateStore(TRAP_val, get_reg_ptr(traits<ARCH>::TRAP_STATE), true);
this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits<ARCH>::LAST_BRANCH), false);
this->builder.CreateStore(TRAP_val, get_reg_ptr(traits::TRAP_STATE), true);
this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
}
template <typename ARCH> void vm_impl<ARCH>::gen_leave_trap(unsigned lvl) {
std::vector<Value *> args{ this->core_ptr, ConstantInt::get(getContext(), APInt(64, lvl)) };
this->builder.CreateCall(this->mod->getFunction("leave_trap"), args);
auto *PC_val = this->gen_read_mem(traits<ARCH>::CSR, (lvl << 8) + 0x41, traits<ARCH>::XLEN / 8);
this->builder.CreateStore(PC_val, get_reg_ptr(traits<ARCH>::NEXT_PC), false);
this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits<ARCH>::LAST_BRANCH), false);
auto *PC_val = this->gen_read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN / 8);
this->builder.CreateStore(PC_val, get_reg_ptr(traits::NEXT_PC), false);
this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
}
template <typename ARCH> void vm_impl<ARCH>::gen_wait(unsigned type) {
@ -295,22 +327,25 @@ template <typename ARCH> void vm_impl<ARCH>::gen_wait(unsigned type) {
template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(BasicBlock *trap_blk) {
this->builder.SetInsertPoint(trap_blk);
auto *trap_state_val = this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::TRAP_STATE), get_reg_ptr(traits<ARCH>::TRAP_STATE), true);
this->gen_sync(POST_SYNC, -1); //TODO get right InstrId
auto *trap_state_val = this->builder.CreateLoad(this->get_typeptr(traits::TRAP_STATE), get_reg_ptr(traits::TRAP_STATE), true);
this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()),
get_reg_ptr(traits<ARCH>::LAST_BRANCH), false);
get_reg_ptr(traits::LAST_BRANCH), false);
std::vector<Value *> args{this->core_ptr, this->adj_to64(trap_state_val),
this->adj_to64(this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::PC), get_reg_ptr(traits<ARCH>::PC), false))};
this->adj_to64(this->builder.CreateLoad(this->get_typeptr(traits::PC), get_reg_ptr(traits::PC), false))};
this->builder.CreateCall(this->mod->getFunction("enter_trap"), args);
auto *trap_addr_val = this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::NEXT_PC), get_reg_ptr(traits<ARCH>::NEXT_PC), false);
auto *trap_addr_val = this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), false);
this->builder.CreateRet(trap_addr_val);
}
template <typename ARCH> inline void vm_impl<ARCH>::gen_trap_check(BasicBlock *bb) {
auto *v = this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::TRAP_STATE), get_reg_ptr(arch::traits<ARCH>::TRAP_STATE), true);
auto* target_bb = BasicBlock::Create(this->mod->getContext(), "", this->func, bb);
auto *v = this->builder.CreateLoad(this->get_typeptr(traits::TRAP_STATE), get_reg_ptr(traits::TRAP_STATE), true);
this->gen_cond_branch(this->builder.CreateICmp(
ICmpInst::ICMP_EQ, v,
ConstantInt::get(getContext(), APInt(v->getType()->getIntegerBitWidth(), 0))),
bb, this->trap_blk, 1);
target_bb, this->trap_blk, 1);
this->builder.SetInsertPoint(target_bb);
}
} // namespace ${coreDef.name.toLowerCase()}

View File

@ -1113,8 +1113,10 @@ iss::status riscv_hart_m_p<BASE, FEAT>::write_mem(phys_addr_t paddr, unsigned le
}
this->reg.trap_state=std::numeric_limits<uint32_t>::max();
this->interrupt_sim=hostvar;
#ifndef WITH_TCC
throw(iss::simulation_stopped(hostvar));
#endif
break;
//throw(iss::simulation_stopped(hostvar));
case 0x0101: {
char c = static_cast<char>(hostvar & 0xff);
if (c == '\n' || c == 0) {

File diff suppressed because it is too large Load Diff