forked from Firmware/Firmwares
Compare commits
8 Commits
feature/fp
...
master
Author | SHA1 | Date | |
---|---|---|---|
0de438dc52 | |||
5f44f8df98 | |||
02ce96eed8 | |||
46f197c287 | |||
43e2a299db | |||
8450f85c93 | |||
a14ff554b0 | |||
588ca3c7ba |
Binary file not shown.
@ -68,6 +68,20 @@ int main() {
|
||||
|
||||
|
||||
|
||||
uint32_t batch_size = spn_checker::batch_size_reg();
|
||||
uint32_t iterations = spn_checker::num_iterations_reg();
|
||||
|
||||
printf("BATCH SIZE: %d\n", batch_size);
|
||||
printf("ITERATIONS: %d\n", iterations);
|
||||
|
||||
|
||||
|
||||
int in_addr = 0x30000000; // place input samples in the SPI memory
|
||||
int out_addr = 0x3C000000;
|
||||
int out_addr2 = 0x3E000000;
|
||||
|
||||
|
||||
|
||||
spn::mode_reg() = 1;
|
||||
spn::start_reg() = 1;
|
||||
wait_for_spn_interrupt();
|
||||
@ -91,39 +105,49 @@ int main() {
|
||||
|
||||
printf("Result Bytes: %d\n", result_bytes);
|
||||
|
||||
uint32_t step = 50000;
|
||||
uint32_t iterations = 2;
|
||||
uint32_t in_bytes = batch_size * sample_bytes;
|
||||
uint32_t out_bytes = batch_size * result_bytes;
|
||||
|
||||
uint32_t total_in = in_bytes * iterations;
|
||||
|
||||
if (total_in > (out_addr - in_addr)) {
|
||||
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr - in_addr);
|
||||
return 1;
|
||||
}
|
||||
if (out_bytes > (out_addr2 - out_addr)) {
|
||||
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
|
||||
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = (step * result_bytes) / axi_bytes;
|
||||
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
|
||||
uint32_t in_beats = in_bytes / axi_bytes;
|
||||
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = out_bytes / axi_bytes;
|
||||
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
|
||||
|
||||
int in_addr = 0x20010000; // place input samples in the SPI memory
|
||||
int out_addr = 0x20210000;
|
||||
int fpga_address_in = fpga_alloc(step * sample_bytes + 64);
|
||||
int fpga_address_out = fpga_alloc(step * result_bytes + 64);
|
||||
uint32_t current_in_addr = in_addr;
|
||||
int fpga_address_in = fpga_alloc(batch_size * sample_bytes + 64);
|
||||
int fpga_address_out = fpga_alloc(batch_size * result_bytes + 64);
|
||||
|
||||
// inject SPN input data
|
||||
spn_checker::input_addr_reg() = in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
|
||||
spn_checker::input_addr_reg() = current_in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
|
||||
spn_checker::start_data_trans_reg() = 1;
|
||||
spn_checker::output_addr_reg() = out_addr;
|
||||
|
||||
//run_xspn(in_addr, out_addr);
|
||||
for (int k = 0; k < iterations*step; k+=step) {
|
||||
fpga_dma(1, fpga_address_in, in_addr, step * sample_bytes);
|
||||
run_xspn(fpga_address_in, fpga_address_out, step, in_beats, out_beats);
|
||||
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
|
||||
fpga_dma(1, fpga_address_in, current_in_addr, batch_size * sample_bytes);
|
||||
run_xspn(fpga_address_in, fpga_address_out, batch_size, in_beats, out_beats);
|
||||
wait_for_spn_interrupt();
|
||||
spn::interrupt_reg() = 1;
|
||||
printf("XSPN finished\n");
|
||||
fpga_dma(0, fpga_address_out, out_addr, step * result_bytes);
|
||||
fpga_dma(0, fpga_address_out, out_addr, batch_size * result_bytes);
|
||||
spn_checker::offset_reg() = k;
|
||||
spn_checker::length_reg() = step;
|
||||
spn_checker::length_reg() = batch_size;
|
||||
spn_checker::start_result_check_reg() = 1;
|
||||
|
||||
in_addr += step * sample_bytes; // 5 bytes in each sample
|
||||
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample
|
||||
}
|
||||
|
||||
fpga_free(fpga_address_in);
|
||||
|
@ -45,6 +45,9 @@
|
||||
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
|
||||
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
|
||||
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
|
||||
#define SPN_CNTL_REG_XSPN_COUNT 0x80
|
||||
#define SPN_CNTL_REG_BATCH_SIZE 0x90
|
||||
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
|
||||
|
||||
template<uint32_t BASE_ADDR>
|
||||
class spn_checker_regs {
|
||||
@ -69,6 +72,12 @@ public:
|
||||
|
||||
uint32_t r_start_data_trans;
|
||||
|
||||
uint32_t r_xspn_count;
|
||||
|
||||
uint32_t r_batch_size;
|
||||
|
||||
uint32_t r_num_iterations;
|
||||
|
||||
static inline uint32_t& start_result_check_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
|
||||
}
|
||||
@ -101,4 +110,16 @@ public:
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
|
||||
}
|
||||
|
||||
static inline uint32_t& xspn_count_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
|
||||
}
|
||||
|
||||
static inline uint32_t& batch_size_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
|
||||
}
|
||||
|
||||
static inline uint32_t& num_iterations_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
|
||||
}
|
||||
|
||||
};
|
||||
|
Binary file not shown.
@ -12,8 +12,8 @@ typedef void (*function_ptr_t) (void);
|
||||
//! Instance data for the PLIC.
|
||||
plic_instance_t g_plic;
|
||||
std::array<function_ptr_t,PLIC_NUM_INTERRUPTS> g_ext_interrupt_handlers;
|
||||
bool spn1_hw_interrupt{true};
|
||||
bool spn2_hw_interrupt{true};
|
||||
volatile bool spn1_hw_interrupt{true};
|
||||
volatile bool spn2_hw_interrupt{true};
|
||||
|
||||
|
||||
/*! \brief external interrupt handler
|
||||
@ -45,36 +45,38 @@ void configure_irq(size_t irq_num, function_ptr_t handler, unsigned char prio=1)
|
||||
}
|
||||
|
||||
void wait_for_spn1_interrupt() {
|
||||
// wait until HW is done
|
||||
if(spn1_hw_interrupt) {
|
||||
do{
|
||||
asm("wfi");
|
||||
asm("nop");
|
||||
}while(spn1_hw_interrupt);
|
||||
// This is a time critical part. It must be ensured that no interrupt is processed between flag checking and wfi.
|
||||
// Disable interrupts and wait a few more clocks for the instruction to take effect before checking the flag.
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
while(spn1_hw_interrupt) {
|
||||
// Enable interrupts and immediately enter wfi.
|
||||
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
|
||||
// Disable interrupts again before examine the flag value.
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
}
|
||||
spn1_hw_interrupt=true;
|
||||
set_csr(mie, MIP_MEIP);
|
||||
}
|
||||
|
||||
void wait_for_spn2_interrupt() {
|
||||
// wait until HW is done
|
||||
if(spn2_hw_interrupt) {
|
||||
do{
|
||||
asm("wfi");
|
||||
asm("nop");
|
||||
}while(spn2_hw_interrupt);
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
while(spn2_hw_interrupt) {
|
||||
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
}
|
||||
spn2_hw_interrupt=true;
|
||||
set_csr(mie, MIP_MEIP);
|
||||
}
|
||||
|
||||
void wait_for_spn_interrupts() {
|
||||
if(spn1_hw_interrupt || spn2_hw_interrupt) {
|
||||
do{
|
||||
asm("wfi");
|
||||
asm("nop");
|
||||
}while(spn1_hw_interrupt || spn2_hw_interrupt);
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
while(spn1_hw_interrupt || spn2_hw_interrupt) {
|
||||
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
|
||||
}
|
||||
spn1_hw_interrupt=true;
|
||||
spn2_hw_interrupt=true;
|
||||
set_csr(mie, MIP_MEIP);
|
||||
}
|
||||
|
||||
/*!\brief initializes platform
|
||||
|
@ -14,8 +14,10 @@ void run_xspn1(int in_addr, int out_addr, int num_samples, int in_beats, int out
|
||||
spn_1::output_addr_reg() = out_addr;
|
||||
spn_1::num_of_in_beats_reg() = in_beats; // Number of AXI4 burst beats needed to load all input data
|
||||
spn_1::num_of_out_beats_reg() = out_beats; // Number of AXI4 burst beats needed to store all result data
|
||||
spn_1::start_reg() = 1;
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP)); // Disable interrupts
|
||||
printf("Starting first XSPN instance\n");
|
||||
asm volatile ("csrrs x0, mie, %0; nop" : : "r"(MIP_MEIP)); // Enable interrupts
|
||||
spn_1::start_reg() = 1;
|
||||
}
|
||||
|
||||
void run_xspn2(int in_addr, int out_addr, int num_samples, int in_beats, int out_beats) {
|
||||
@ -25,7 +27,9 @@ void run_xspn2(int in_addr, int out_addr, int num_samples, int in_beats, int out
|
||||
spn_2::output_addr_reg() = out_addr;
|
||||
spn_2::num_of_in_beats_reg() = in_beats; // Number of AXI4 burst beats needed to load all input data
|
||||
spn_2::num_of_out_beats_reg() = out_beats; // Number of AXI4 burst beats needed to store all result data
|
||||
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP)); // Disable interrupts
|
||||
printf("Starting second XSPN instance\n");
|
||||
asm volatile ("csrrs x0, mie, %0; nop" : : "r"(MIP_MEIP)); // Enable interrupts
|
||||
spn_2::start_reg() = 1;
|
||||
}
|
||||
|
||||
@ -48,17 +52,38 @@ int main() {
|
||||
configure_irq(2, spn1_interrupt_handler);
|
||||
configure_irq(22, spn2_interrupt_handler);
|
||||
|
||||
|
||||
uint32_t xspn_count = spn_checker::xspn_count_reg();
|
||||
uint32_t batch_size = spn_checker::batch_size_reg();
|
||||
uint32_t iterations = spn_checker::num_iterations_reg();
|
||||
|
||||
printf("XSPN COUNT: %d\n", xspn_count);
|
||||
if (xspn_count < 1 || xspn_count > 2) {
|
||||
printf("ERROR: invalid XSPN COUNT");
|
||||
return 1;
|
||||
}
|
||||
printf("BATCH SIZE: %d\n", batch_size);
|
||||
printf("ITERATIONS: %d\n", iterations);
|
||||
|
||||
|
||||
|
||||
int in_addr = 0x30000000; // place input samples in the SPI memory
|
||||
int out_addr1 = 0x3C000000;
|
||||
int out_addr2 = 0x3E000000;
|
||||
|
||||
spn_1::mode_reg() = 1;
|
||||
spn_1::start_reg() = 1;
|
||||
wait_for_spn1_interrupt();
|
||||
uint32_t readout = spn_1::readout_reg();
|
||||
printf("READOUT first HW instance:0x%x\n", readout);
|
||||
|
||||
spn_2::mode_reg() = 1;
|
||||
spn_2::start_reg() = 1;
|
||||
wait_for_spn2_interrupt();
|
||||
uint32_t readout2 = spn_2::readout_reg();
|
||||
printf("READOUT second HW instance:0x%x\n", readout2);
|
||||
if (xspn_count == 2) {
|
||||
spn_2::mode_reg() = 1;
|
||||
spn_2::start_reg() = 1;
|
||||
wait_for_spn2_interrupt();
|
||||
uint32_t readout2 = spn_2::readout_reg();
|
||||
printf("READOUT second HW instance:0x%x\n", readout2);
|
||||
}
|
||||
|
||||
uint32_t axi_bytes = readout;
|
||||
axi_bytes = axi_bytes & 0xff;
|
||||
@ -74,39 +99,50 @@ int main() {
|
||||
uint32_t result_bytes = 8;
|
||||
printf("Result Bytes: %d\n", result_bytes);
|
||||
|
||||
const uint32_t amount_of_input_samples = 50000;
|
||||
uint32_t step = 50000;
|
||||
uint32_t iterations = 5;
|
||||
uint32_t in_bytes = batch_size * sample_bytes;
|
||||
uint32_t out_bytes = batch_size * result_bytes;
|
||||
|
||||
uint32_t total_in = in_bytes * iterations;
|
||||
|
||||
if (total_in > (out_addr1 - in_addr)) {
|
||||
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr1 - in_addr);
|
||||
return 1;
|
||||
}
|
||||
if (out_bytes > (out_addr2 - out_addr1)) {
|
||||
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
|
||||
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = (step * result_bytes) / axi_bytes;
|
||||
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
|
||||
|
||||
int in_addr = 0x20010000; // place input samples in the SPI memory
|
||||
int out_addr1 = 0x20510000;
|
||||
int out_addr2 = 0x205F0000;
|
||||
uint32_t in_beats = in_bytes / axi_bytes;
|
||||
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = out_bytes / axi_bytes;
|
||||
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
|
||||
|
||||
uint32_t current_in_addr = in_addr;
|
||||
|
||||
// inject SPN input data
|
||||
spn_checker::input_addr_reg() = in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
|
||||
spn_checker::input_addr_reg() = current_in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
|
||||
spn_checker::start_data_trans_reg() = 1;
|
||||
|
||||
spn_checker::output_addr_reg() = out_addr1;
|
||||
spn_checker::output_addr2_reg() = out_addr2;
|
||||
for (int k = 0; k < iterations*step; k+=step) {
|
||||
run_xspn1(in_addr, out_addr1, step, in_beats, out_beats);
|
||||
run_xspn2(in_addr, out_addr2, step, in_beats, out_beats);
|
||||
wait_for_spn_interrupts();
|
||||
if (xspn_count == 2) {
|
||||
spn_checker::output_addr2_reg() = out_addr2;
|
||||
}
|
||||
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
|
||||
run_xspn1(current_in_addr, out_addr1, batch_size, in_beats, out_beats);
|
||||
if (xspn_count == 2) {
|
||||
run_xspn2(current_in_addr, out_addr2, batch_size, in_beats, out_beats);
|
||||
wait_for_spn_interrupts();
|
||||
} else {
|
||||
wait_for_spn1_interrupt();
|
||||
}
|
||||
printf("XSPN finished\n");
|
||||
spn_checker::offset_reg() = k;
|
||||
spn_checker::length_reg() = step;
|
||||
spn_checker::length_reg() = batch_size;
|
||||
spn_checker::start_result_check_reg() = 1;
|
||||
|
||||
in_addr += step * sample_bytes; // 5 bytes in each sample
|
||||
if (k == amount_of_input_samples) {
|
||||
in_addr = 0x20010000;
|
||||
}
|
||||
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample (NIPS5)
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -45,6 +45,9 @@
|
||||
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
|
||||
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
|
||||
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
|
||||
#define SPN_CNTL_REG_XSPN_COUNT 0x80
|
||||
#define SPN_CNTL_REG_BATCH_SIZE 0x90
|
||||
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
|
||||
|
||||
template<uint32_t BASE_ADDR>
|
||||
class spn_checker_regs {
|
||||
@ -69,6 +72,12 @@ public:
|
||||
|
||||
uint32_t r_start_data_trans;
|
||||
|
||||
uint32_t r_xspn_count;
|
||||
|
||||
uint32_t r_batch_size;
|
||||
|
||||
uint32_t r_num_iterations;
|
||||
|
||||
static inline uint32_t& start_result_check_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
|
||||
}
|
||||
@ -101,4 +110,16 @@ public:
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
|
||||
}
|
||||
|
||||
static inline uint32_t& xspn_count_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
|
||||
}
|
||||
|
||||
static inline uint32_t& batch_size_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
|
||||
}
|
||||
|
||||
static inline uint32_t& num_iterations_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import re
|
||||
|
||||
import sys
|
||||
|
||||
if (len(sys.argv) < 2):
|
||||
print('No argument given')
|
||||
exit()
|
||||
|
||||
|
||||
nips = sys.argv[1]
|
||||
|
||||
input_file = open(nips + '_inputdata.txt', 'r')
|
||||
in_data = input_file.read();
|
||||
|
||||
# convert double to uint8_t
|
||||
in_data = re.sub(r'(\d)\.0+e\+00', r'\1,', in_data)
|
||||
in_data = re.sub(r'(\d)\.(\d)0+e\+01', r'\1\2,', in_data)
|
||||
in_data = re.sub(r'(\d)\.(\d)(\d)0+e\+02', r'\1\2\3,', in_data)
|
||||
in_data = in_data.replace(";", "")
|
||||
|
||||
# remove last comma
|
||||
in_data = in_data[:-2]
|
||||
# count samples
|
||||
input_sample_cnt = len(in_data.split(","))
|
||||
#####################################################################
|
||||
|
||||
ref_file = open(nips + '_outputdata.txt', 'r')
|
||||
ref_data = ref_file.read()
|
||||
ref_data = re.sub(r'\n', r' / ln2,\n', ref_data)
|
||||
ref_data = ref_data[:-2]
|
||||
ref_sample_cnt = len(ref_data.split(","))
|
||||
|
||||
# create cpp file
|
||||
f = open("../xspn_data.cpp", "w")
|
||||
cpp_file = "#include <array>\n"
|
||||
cpp_file += "#include <cmath>\n"
|
||||
cpp_file += "#include <bits/stdint-uintn.h>\n\n"
|
||||
# The results in the outputdata.txt file are not directly what comes out of the PE but the natural logarithm of it.
|
||||
cpp_file += "constexpr auto ln2 = std::log(2);\n"
|
||||
cpp_file += "std::array<uint8_t, " + str(input_sample_cnt) + "> input_data = {\n" + str(in_data) + "}; \n\n"
|
||||
cpp_file += "std::array<double, " + str(ref_sample_cnt) + "> ref_data = {\n" + str(ref_data)+ "}; \n"
|
||||
|
||||
f.write(cpp_file)
|
||||
f.close()
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user