8 Commits

12 changed files with 171 additions and 24113 deletions

Binary file not shown.

View File

@ -68,6 +68,20 @@ int main() {
uint32_t batch_size = spn_checker::batch_size_reg();
uint32_t iterations = spn_checker::num_iterations_reg();
printf("BATCH SIZE: %d\n", batch_size);
printf("ITERATIONS: %d\n", iterations);
int in_addr = 0x30000000; // place input samples in the SPI memory
int out_addr = 0x3C000000;
int out_addr2 = 0x3E000000;
spn::mode_reg() = 1;
spn::start_reg() = 1;
wait_for_spn_interrupt();
@ -91,39 +105,49 @@ int main() {
printf("Result Bytes: %d\n", result_bytes);
uint32_t step = 50000;
uint32_t iterations = 2;
uint32_t in_bytes = batch_size * sample_bytes;
uint32_t out_bytes = batch_size * result_bytes;
uint32_t total_in = in_bytes * iterations;
if (total_in > (out_addr - in_addr)) {
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr - in_addr);
return 1;
}
if (out_bytes > (out_addr2 - out_addr)) {
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr);
return 1;
}
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
uint32_t out_beats = (step * result_bytes) / axi_bytes;
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
uint32_t in_beats = in_bytes / axi_bytes;
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
uint32_t out_beats = out_bytes / axi_bytes;
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
int in_addr = 0x20010000; // place input samples in the SPI memory
int out_addr = 0x20210000;
int fpga_address_in = fpga_alloc(step * sample_bytes + 64);
int fpga_address_out = fpga_alloc(step * result_bytes + 64);
uint32_t current_in_addr = in_addr;
int fpga_address_in = fpga_alloc(batch_size * sample_bytes + 64);
int fpga_address_out = fpga_alloc(batch_size * result_bytes + 64);
// inject SPN input data
spn_checker::input_addr_reg() = in_addr;
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
spn_checker::input_addr_reg() = current_in_addr;
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
spn_checker::start_data_trans_reg() = 1;
spn_checker::output_addr_reg() = out_addr;
//run_xspn(in_addr, out_addr);
for (int k = 0; k < iterations*step; k+=step) {
fpga_dma(1, fpga_address_in, in_addr, step * sample_bytes);
run_xspn(fpga_address_in, fpga_address_out, step, in_beats, out_beats);
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
fpga_dma(1, fpga_address_in, current_in_addr, batch_size * sample_bytes);
run_xspn(fpga_address_in, fpga_address_out, batch_size, in_beats, out_beats);
wait_for_spn_interrupt();
spn::interrupt_reg() = 1;
printf("XSPN finished\n");
fpga_dma(0, fpga_address_out, out_addr, step * result_bytes);
fpga_dma(0, fpga_address_out, out_addr, batch_size * result_bytes);
spn_checker::offset_reg() = k;
spn_checker::length_reg() = step;
spn_checker::length_reg() = batch_size;
spn_checker::start_result_check_reg() = 1;
in_addr += step * sample_bytes; // 5 bytes in each sample
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample
}
fpga_free(fpga_address_in);

View File

@ -45,6 +45,9 @@
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
#define SPN_CNTL_REG_XSPN_COUNT 0x80
#define SPN_CNTL_REG_BATCH_SIZE 0x90
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
template<uint32_t BASE_ADDR>
class spn_checker_regs {
@ -69,6 +72,12 @@ public:
uint32_t r_start_data_trans;
uint32_t r_xspn_count;
uint32_t r_batch_size;
uint32_t r_num_iterations;
static inline uint32_t& start_result_check_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
}
@ -101,4 +110,16 @@ public:
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
}
static inline uint32_t& xspn_count_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
}
static inline uint32_t& batch_size_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
}
static inline uint32_t& num_iterations_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
}
};

Binary file not shown.

View File

@ -12,8 +12,8 @@ typedef void (*function_ptr_t) (void);
//! Instance data for the PLIC.
plic_instance_t g_plic;
std::array<function_ptr_t,PLIC_NUM_INTERRUPTS> g_ext_interrupt_handlers;
bool spn1_hw_interrupt{true};
bool spn2_hw_interrupt{true};
volatile bool spn1_hw_interrupt{true};
volatile bool spn2_hw_interrupt{true};
/*! \brief external interrupt handler
@ -45,36 +45,38 @@ void configure_irq(size_t irq_num, function_ptr_t handler, unsigned char prio=1)
}
void wait_for_spn1_interrupt() {
// wait until HW is done
if(spn1_hw_interrupt) {
do{
asm("wfi");
asm("nop");
}while(spn1_hw_interrupt);
// This is a time critical part. It must be ensured that no interrupt is processed between flag checking and wfi.
// Disable interrupts and wait a few more clocks for the instruction to take effect before checking the flag.
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
while(spn1_hw_interrupt) {
// Enable interrupts and immediately enter wfi.
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
// Disable interrupts again before examine the flag value.
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
}
spn1_hw_interrupt=true;
set_csr(mie, MIP_MEIP);
}
void wait_for_spn2_interrupt() {
// wait until HW is done
if(spn2_hw_interrupt) {
do{
asm("wfi");
asm("nop");
}while(spn2_hw_interrupt);
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
while(spn2_hw_interrupt) {
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
}
spn2_hw_interrupt=true;
set_csr(mie, MIP_MEIP);
}
void wait_for_spn_interrupts() {
if(spn1_hw_interrupt || spn2_hw_interrupt) {
do{
asm("wfi");
asm("nop");
}while(spn1_hw_interrupt || spn2_hw_interrupt);
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
while(spn1_hw_interrupt || spn2_hw_interrupt) {
asm volatile ("csrrs x0, mie, %0; wfi; nop" : : "r"(MIP_MEIP));
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP));
}
spn1_hw_interrupt=true;
spn2_hw_interrupt=true;
set_csr(mie, MIP_MEIP);
}
/*!\brief initializes platform

View File

@ -14,8 +14,10 @@ void run_xspn1(int in_addr, int out_addr, int num_samples, int in_beats, int out
spn_1::output_addr_reg() = out_addr;
spn_1::num_of_in_beats_reg() = in_beats; // Number of AXI4 burst beats needed to load all input data
spn_1::num_of_out_beats_reg() = out_beats; // Number of AXI4 burst beats needed to store all result data
spn_1::start_reg() = 1;
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP)); // Disable interrupts
printf("Starting first XSPN instance\n");
asm volatile ("csrrs x0, mie, %0; nop" : : "r"(MIP_MEIP)); // Enable interrupts
spn_1::start_reg() = 1;
}
void run_xspn2(int in_addr, int out_addr, int num_samples, int in_beats, int out_beats) {
@ -25,7 +27,9 @@ void run_xspn2(int in_addr, int out_addr, int num_samples, int in_beats, int out
spn_2::output_addr_reg() = out_addr;
spn_2::num_of_in_beats_reg() = in_beats; // Number of AXI4 burst beats needed to load all input data
spn_2::num_of_out_beats_reg() = out_beats; // Number of AXI4 burst beats needed to store all result data
asm volatile ("csrrc x0, mie, %0; nop; nop" : : "r"(MIP_MEIP)); // Disable interrupts
printf("Starting second XSPN instance\n");
asm volatile ("csrrs x0, mie, %0; nop" : : "r"(MIP_MEIP)); // Enable interrupts
spn_2::start_reg() = 1;
}
@ -48,17 +52,38 @@ int main() {
configure_irq(2, spn1_interrupt_handler);
configure_irq(22, spn2_interrupt_handler);
uint32_t xspn_count = spn_checker::xspn_count_reg();
uint32_t batch_size = spn_checker::batch_size_reg();
uint32_t iterations = spn_checker::num_iterations_reg();
printf("XSPN COUNT: %d\n", xspn_count);
if (xspn_count < 1 || xspn_count > 2) {
printf("ERROR: invalid XSPN COUNT");
return 1;
}
printf("BATCH SIZE: %d\n", batch_size);
printf("ITERATIONS: %d\n", iterations);
int in_addr = 0x30000000; // place input samples in the SPI memory
int out_addr1 = 0x3C000000;
int out_addr2 = 0x3E000000;
spn_1::mode_reg() = 1;
spn_1::start_reg() = 1;
wait_for_spn1_interrupt();
uint32_t readout = spn_1::readout_reg();
printf("READOUT first HW instance:0x%x\n", readout);
spn_2::mode_reg() = 1;
spn_2::start_reg() = 1;
wait_for_spn2_interrupt();
uint32_t readout2 = spn_2::readout_reg();
printf("READOUT second HW instance:0x%x\n", readout2);
if (xspn_count == 2) {
spn_2::mode_reg() = 1;
spn_2::start_reg() = 1;
wait_for_spn2_interrupt();
uint32_t readout2 = spn_2::readout_reg();
printf("READOUT second HW instance:0x%x\n", readout2);
}
uint32_t axi_bytes = readout;
axi_bytes = axi_bytes & 0xff;
@ -74,39 +99,50 @@ int main() {
uint32_t result_bytes = 8;
printf("Result Bytes: %d\n", result_bytes);
const uint32_t amount_of_input_samples = 50000;
uint32_t step = 50000;
uint32_t iterations = 5;
uint32_t in_bytes = batch_size * sample_bytes;
uint32_t out_bytes = batch_size * result_bytes;
uint32_t total_in = in_bytes * iterations;
if (total_in > (out_addr1 - in_addr)) {
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr1 - in_addr);
return 1;
}
if (out_bytes > (out_addr2 - out_addr1)) {
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr1);
return 1;
}
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
uint32_t out_beats = (step * result_bytes) / axi_bytes;
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
int in_addr = 0x20010000; // place input samples in the SPI memory
int out_addr1 = 0x20510000;
int out_addr2 = 0x205F0000;
uint32_t in_beats = in_bytes / axi_bytes;
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
uint32_t out_beats = out_bytes / axi_bytes;
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
uint32_t current_in_addr = in_addr;
// inject SPN input data
spn_checker::input_addr_reg() = in_addr;
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
spn_checker::input_addr_reg() = current_in_addr;
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
spn_checker::start_data_trans_reg() = 1;
spn_checker::output_addr_reg() = out_addr1;
spn_checker::output_addr2_reg() = out_addr2;
for (int k = 0; k < iterations*step; k+=step) {
run_xspn1(in_addr, out_addr1, step, in_beats, out_beats);
run_xspn2(in_addr, out_addr2, step, in_beats, out_beats);
wait_for_spn_interrupts();
if (xspn_count == 2) {
spn_checker::output_addr2_reg() = out_addr2;
}
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
run_xspn1(current_in_addr, out_addr1, batch_size, in_beats, out_beats);
if (xspn_count == 2) {
run_xspn2(current_in_addr, out_addr2, batch_size, in_beats, out_beats);
wait_for_spn_interrupts();
} else {
wait_for_spn1_interrupt();
}
printf("XSPN finished\n");
spn_checker::offset_reg() = k;
spn_checker::length_reg() = step;
spn_checker::length_reg() = batch_size;
spn_checker::start_result_check_reg() = 1;
in_addr += step * sample_bytes; // 5 bytes in each sample
if (k == amount_of_input_samples) {
in_addr = 0x20010000;
}
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample (NIPS5)
}
return 0;

View File

@ -45,6 +45,9 @@
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
#define SPN_CNTL_REG_XSPN_COUNT 0x80
#define SPN_CNTL_REG_BATCH_SIZE 0x90
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
template<uint32_t BASE_ADDR>
class spn_checker_regs {
@ -69,6 +72,12 @@ public:
uint32_t r_start_data_trans;
uint32_t r_xspn_count;
uint32_t r_batch_size;
uint32_t r_num_iterations;
static inline uint32_t& start_result_check_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
}
@ -101,4 +110,16 @@ public:
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
}
static inline uint32_t& xspn_count_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
}
static inline uint32_t& batch_size_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
}
static inline uint32_t& num_iterations_reg(){
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
}
};

View File

@ -1,46 +0,0 @@
#!/usr/bin/python3
import re
import sys
if (len(sys.argv) < 2):
print('No argument given')
exit()
nips = sys.argv[1]
input_file = open(nips + '_inputdata.txt', 'r')
in_data = input_file.read();
# convert double to uint8_t
in_data = re.sub(r'(\d)\.0+e\+00', r'\1,', in_data)
in_data = re.sub(r'(\d)\.(\d)0+e\+01', r'\1\2,', in_data)
in_data = re.sub(r'(\d)\.(\d)(\d)0+e\+02', r'\1\2\3,', in_data)
in_data = in_data.replace(";", "")
# remove last comma
in_data = in_data[:-2]
# count samples
input_sample_cnt = len(in_data.split(","))
#####################################################################
ref_file = open(nips + '_outputdata.txt', 'r')
ref_data = ref_file.read()
ref_data = re.sub(r'\n', r' / ln2,\n', ref_data)
ref_data = ref_data[:-2]
ref_sample_cnt = len(ref_data.split(","))
# create cpp file
f = open("../xspn_data.cpp", "w")
cpp_file = "#include <array>\n"
cpp_file += "#include <cmath>\n"
cpp_file += "#include <bits/stdint-uintn.h>\n\n"
# The results in the outputdata.txt file are not directly what comes out of the PE but the natural logarithm of it.
cpp_file += "constexpr auto ln2 = std::log(2);\n"
cpp_file += "std::array<uint8_t, " + str(input_sample_cnt) + "> input_data = {\n" + str(in_data) + "}; \n\n"
cpp_file += "std::array<double, " + str(ref_sample_cnt) + "> ref_data = {\n" + str(ref_data)+ "}; \n"
f.write(cpp_file)
f.close()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff