/*******************************************************************************
 * Copyright (C) 2020-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 *******************************************************************************/
// clang-format off
#include <iss/arch/${coreDef.name.toLowerCase()}.h>
#include <iss/debugger/gdb_session.h>
#include <iss/debugger/server.h>
#include <iss/iss.h>
#include <iss/tcc/vm_base.h>
#include <util/logging.h>
#include <sstream>
#include <iss/instruction_decoder.h>
<%def fcsr = registers.find {it.name=='FCSR'}
if(fcsr != null) {%>
#include <vm/fp_functions.h><%}%>
#ifndef FMT_HEADER_ONLY
#define FMT_HEADER_ONLY
#endif
#include <fmt/format.h>

#include <array>
#include <iss/debugger/riscv_target_adapter.h>

namespace iss {
namespace tcc {
namespace ${coreDef.name.toLowerCase()} {
using namespace iss::arch;
using namespace iss::debugger;

template <typename ARCH> class vm_impl : public iss::tcc::vm_base<ARCH> {
public:
    using traits = arch::traits<ARCH>;
    using super       = typename iss::tcc::vm_base<ARCH>;
    using virt_addr_t = typename super::virt_addr_t;
    using phys_addr_t = typename super::phys_addr_t;
    using code_word_t = typename super::code_word_t;
    using mem_type_e  = typename traits::mem_type_e;    
    using addr_t      = typename super::addr_t;
    using tu_builder  = typename super::tu_builder;

    vm_impl();

    vm_impl(ARCH &core, unsigned core_id = 0, unsigned cluster_id = 0);

    void enableDebug(bool enable) { super::sync_exec = super::ALL_SYNC; }

    target_adapter_if *accquire_target_adapter(server_if *srv) override {
        debugger_if::dbg_enabled = true;
        if (vm_base<ARCH>::tgt_adapter == nullptr)
            vm_base<ARCH>::tgt_adapter = new riscv_target_adapter<ARCH>(srv, this->get_arch());
        return vm_base<ARCH>::tgt_adapter;
    }

protected:
    using vm_base<ARCH>::get_reg_ptr;

    using this_class = vm_impl<ARCH>;
    using compile_ret_t = continuation_e;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr, tu_builder&);

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
<%
if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
<%}%>
    void add_prologue(tu_builder& tu) override;

    void setup_module(std::string m) override {
        super::setup_module(m);
    }

    compile_ret_t gen_single_inst_behavior(virt_addr_t &, tu_builder&) override;

    void gen_trap_behavior(tu_builder& tu) override;

    void gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause);

    void gen_leave_trap(tu_builder& tu, unsigned lvl);

    inline void gen_set_tval(tu_builder& tu, uint64_t new_tval);

    inline void gen_set_tval(tu_builder& tu, value new_tval);

    inline void gen_trap_check(tu_builder& tu) {
        tu("if(*trap_state!=0) goto trap_entry;");
    }

    inline void gen_set_pc(tu_builder& tu, virt_addr_t pc, unsigned reg_num) {
        switch(reg_num){
        case traits::NEXT_PC:
            tu("*next_pc = {:#x};", pc.val);
            break;
        case traits::PC:
            tu("*pc = {:#x};", pc.val);
            break;
        default:
            if(!tu.defined_regs[reg_num]){
                tu("reg_t* reg{:02d} = (reg_t*){:#x};", reg_num, reinterpret_cast<uintptr_t>(get_reg_ptr(reg_num)));
            tu.defined_regs[reg_num]=true;
            }
            tu("*reg{:02d} = {:#x};", reg_num, pc.val);
        }
    }

    
    template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
    inline S sext(U from) {
        auto mask = (1ULL<<W) - 1;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }

<%functions.each{ it.eachLine { %>
    ${it}<%}%>
<%}%>
private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };

    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};

    //needs to be declared after instr_descr
    decoder instr_decoder;

    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    compile_ret_t __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, tu_builder& tu){
        tu("${instr.name}_{:#010x}:", pc.val);
        vm_base<ARCH>::gen_sync(tu, PRE_SYNC,${idx});
        uint64_t PC = pc.val;
        <%instr.fields.eachLine{%>${it}
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, mnemonic);
        }
        auto cur_pc_val = tu.constant(pc.val, traits::reg_bit_widths[traits::PC]);
        pc=pc+ ${instr.length/8};
        gen_set_pc(tu, pc, traits::NEXT_PC);
        tu("(*cycle)++;");
        tu.open_scope();
        this->gen_set_tval(tu, instr);
        <%instr.behavior.eachLine{%>${it}
        <%}%>
        tu.close_scope();
        vm_base<ARCH>::gen_sync(tu, POST_SYNC,${idx});
        gen_trap_check(tu);        
        return returnValue;
    }
    <%}%>
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
    compile_ret_t illegal_instruction(virt_addr_t &pc, code_word_t instr, tu_builder& tu) {
        vm_impl::gen_sync(tu, iss::PRE_SYNC, instr_descr.size());
        if(this->disass_enabled){
            /* generate console output when executing the command */
            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, std::string("illegal_instruction"));
        }
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
        gen_raise_trap(tu, 0, static_cast<int32_t>(traits:: RV_CAUSE_ILLEGAL_INSTRUCTION));
        this->gen_set_tval(tu, instr);
        vm_impl::gen_sync(tu, iss::POST_SYNC, instr_descr.size());
        vm_impl::gen_trap_check(tu);
        return ILLEGAL_INSTR;
    }
};

template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
    volatile CODE_WORD x = instr;
    instr = 2 * x;
}

template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }

template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id)
, instr_decoder([this]() {
        std::vector<generic_instruction_descriptor> g_instr_descr;
        g_instr_descr.reserve(instr_descr.size());
        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}

template <typename ARCH>
continuation_e
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, tu_builder& tu) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
    if (res != iss::Ok)
        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001) 
        return JUMP_TO_SELF;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
        f = instr_descr[inst_index].op;
    if (f == nullptr) {
        f = &this_class::illegal_instruction;
    }
    return (this->*f)(pc, instr, tu);
}

template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {
    tu("  *trap_state = {:#x};", 0x80 << 24 | (cause << 16) | trap_id);
}

template <typename ARCH> void vm_impl<ARCH>::gen_leave_trap(tu_builder& tu, unsigned lvl) {
    tu("leave_trap(core_ptr, {});", lvl);
    tu.store(traits::NEXT_PC, tu.read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN));
    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP), 32));
}

template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, uint64_t new_tval) {
    tu(fmt::format("tval = {};", new_tval));
}
template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, value new_tval) {
    tu(fmt::format("tval = {};", new_tval.str));
}

template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(tu_builder& tu) {
    tu("trap_entry:");
    this->gen_sync(tu, POST_SYNC, -1);    
    tu("enter_trap(core_ptr, *trap_state, *pc, tval);");
    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP),32));
    tu("return *next_pc;");
}
template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
    std::ostringstream os;
    os << tu.add_reg_ptr("trap_state", arch::traits<ARCH>::TRAP_STATE, this->regs_base_ptr);
    os << tu.add_reg_ptr("pending_trap", arch::traits<ARCH>::PENDING_TRAP, this->regs_base_ptr);
    os << tu.add_reg_ptr("cycle", arch::traits<ARCH>::CYCLE, this->regs_base_ptr);
<%if(fcsr != null) {%>
    os << "uint32_t (*fget_flags)()=" << (uintptr_t)&fget_flags << ";\\n";
    os << "uint32_t (*fadd_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fadd_s << ";\\n";
    os << "uint32_t (*fsub_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fsub_s << ";\\n";
    os << "uint32_t (*fmul_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fmul_s << ";\\n";
    os << "uint32_t (*fdiv_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_s << ";\\n";
    os << "uint32_t (*fsqrt_s)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_s << ";\\n";
    os << "uint32_t (*fcmp_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fcmp_s << ";\\n";
    os << "uint32_t (*fcvt_s)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_s << ";\\n";
    os << "uint32_t (*fmadd_s)(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_s << ";\\n";
    os << "uint32_t (*fsel_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fsel_s << ";\\n";
    os << "uint32_t (*fclass_s)( uint32_t v1 )=" << (uintptr_t)&fclass_s << ";\\n";
    os << "uint32_t (*fconv_d2f)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fconv_d2f << ";\\n";
    os << "uint64_t (*fconv_f2d)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fconv_f2d << ";\\n";
    os << "uint64_t (*fadd_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fadd_d << ";\\n";
    os << "uint64_t (*fsub_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fsub_d << ";\\n";
    os << "uint64_t (*fmul_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fmul_d << ";\\n";
    os << "uint64_t (*fdiv_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_d << ";\\n";
    os << "uint64_t (*fsqrt_d)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_d << ";\\n";
    os << "uint64_t (*fcmp_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fcmp_d << ";\\n";
    os << "uint64_t (*fcvt_d)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_d << ";\\n";
    os << "uint64_t (*fmadd_d)(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_d << ";\\n";
    os << "uint64_t (*fsel_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fsel_d << ";\\n";
    os << "uint64_t (*fclass_d)(uint64_t v1  )=" << (uintptr_t)&fclass_d << ";\\n";
    os << "uint64_t (*fcvt_32_64)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_32_64 << ";\\n";
    os << "uint32_t (*fcvt_64_32)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_64_32 << ";\\n";
    os << "uint32_t (*unbox_s)(uint64_t v)=" << (uintptr_t)&unbox_s << ";\\n";
    <%}%>
    tu.add_prologue(os.str());
}

} // namespace ${coreDef.name.toLowerCase()}

template <>
std::unique_ptr<vm_if> create<arch::${coreDef.name.toLowerCase()}>(arch::${coreDef.name.toLowerCase()} *core, unsigned short port, bool dump) {
    auto ret = new ${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*core, dump);
    if (port != 0) debugger::server<debugger::gdb_session>::run_server(ret, port);
    return std::unique_ptr<vm_if>(ret);
}
} // namesapce tcc
} // namespace iss

#include <iss/arch/riscv_hart_m_p.h>
#include <iss/arch/riscv_hart_mu_p.h>
#include <iss/factory.h>
namespace iss {
namespace {
volatile std::array<bool, 2> dummy = {
        core_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|tcc", [](unsigned port, void* init_data) -> std::tuple<cpu_ptr, vm_ptr>{
            auto* cpu = new iss::arch::riscv_hart_m_p<iss::arch::${coreDef.name.toLowerCase()}>();
		    auto vm = new tcc::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
        }),
        core_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|tcc", [](unsigned port, void* init_data) -> std::tuple<cpu_ptr, vm_ptr>{
            auto* cpu = new iss::arch::riscv_hart_mu_p<iss::arch::${coreDef.name.toLowerCase()}>();
		    auto vm = new tcc::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
        })
};
}
}
// clang-format on