Compare commits
5 Commits
feature/fp
...
feature/re
Author | SHA1 | Date | |
---|---|---|---|
46f197c287 | |||
43e2a299db | |||
8450f85c93 | |||
a14ff554b0 | |||
588ca3c7ba |
Binary file not shown.
@ -68,6 +68,20 @@ int main() {
|
||||
|
||||
|
||||
|
||||
uint32_t batch_size = spn_checker::batch_size_reg();
|
||||
uint32_t iterations = spn_checker::num_iterations_reg();
|
||||
|
||||
printf("BATCH SIZE: %d\n", batch_size);
|
||||
printf("ITERATIONS: %d\n", iterations);
|
||||
|
||||
|
||||
|
||||
int in_addr = 0x30000000; // place input samples in the SPI memory
|
||||
int out_addr = 0x3C000000;
|
||||
int out_addr2 = 0x3E000000;
|
||||
|
||||
|
||||
|
||||
spn::mode_reg() = 1;
|
||||
spn::start_reg() = 1;
|
||||
wait_for_spn_interrupt();
|
||||
@ -91,39 +105,49 @@ int main() {
|
||||
|
||||
printf("Result Bytes: %d\n", result_bytes);
|
||||
|
||||
uint32_t step = 50000;
|
||||
uint32_t iterations = 2;
|
||||
uint32_t in_bytes = batch_size * sample_bytes;
|
||||
uint32_t out_bytes = batch_size * result_bytes;
|
||||
|
||||
uint32_t total_in = in_bytes * iterations;
|
||||
|
||||
if (total_in > (out_addr - in_addr)) {
|
||||
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr - in_addr);
|
||||
return 1;
|
||||
}
|
||||
if (out_bytes > (out_addr2 - out_addr)) {
|
||||
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
|
||||
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = (step * result_bytes) / axi_bytes;
|
||||
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
|
||||
uint32_t in_beats = in_bytes / axi_bytes;
|
||||
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = out_bytes / axi_bytes;
|
||||
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
|
||||
|
||||
int in_addr = 0x20010000; // place input samples in the SPI memory
|
||||
int out_addr = 0x20210000;
|
||||
int fpga_address_in = fpga_alloc(step * sample_bytes + 64);
|
||||
int fpga_address_out = fpga_alloc(step * result_bytes + 64);
|
||||
uint32_t current_in_addr = in_addr;
|
||||
int fpga_address_in = fpga_alloc(batch_size * sample_bytes + 64);
|
||||
int fpga_address_out = fpga_alloc(batch_size * result_bytes + 64);
|
||||
|
||||
// inject SPN input data
|
||||
spn_checker::input_addr_reg() = in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
|
||||
spn_checker::input_addr_reg() = current_in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
|
||||
spn_checker::start_data_trans_reg() = 1;
|
||||
spn_checker::output_addr_reg() = out_addr;
|
||||
|
||||
//run_xspn(in_addr, out_addr);
|
||||
for (int k = 0; k < iterations*step; k+=step) {
|
||||
fpga_dma(1, fpga_address_in, in_addr, step * sample_bytes);
|
||||
run_xspn(fpga_address_in, fpga_address_out, step, in_beats, out_beats);
|
||||
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
|
||||
fpga_dma(1, fpga_address_in, current_in_addr, batch_size * sample_bytes);
|
||||
run_xspn(fpga_address_in, fpga_address_out, batch_size, in_beats, out_beats);
|
||||
wait_for_spn_interrupt();
|
||||
spn::interrupt_reg() = 1;
|
||||
printf("XSPN finished\n");
|
||||
fpga_dma(0, fpga_address_out, out_addr, step * result_bytes);
|
||||
fpga_dma(0, fpga_address_out, out_addr, batch_size * result_bytes);
|
||||
spn_checker::offset_reg() = k;
|
||||
spn_checker::length_reg() = step;
|
||||
spn_checker::length_reg() = batch_size;
|
||||
spn_checker::start_result_check_reg() = 1;
|
||||
|
||||
in_addr += step * sample_bytes; // 5 bytes in each sample
|
||||
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample
|
||||
}
|
||||
|
||||
fpga_free(fpga_address_in);
|
||||
|
@ -45,6 +45,9 @@
|
||||
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
|
||||
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
|
||||
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
|
||||
#define SPN_CNTL_REG_XSPN_COUNT 0x80
|
||||
#define SPN_CNTL_REG_BATCH_SIZE 0x90
|
||||
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
|
||||
|
||||
template<uint32_t BASE_ADDR>
|
||||
class spn_checker_regs {
|
||||
@ -69,6 +72,12 @@ public:
|
||||
|
||||
uint32_t r_start_data_trans;
|
||||
|
||||
uint32_t r_xspn_count;
|
||||
|
||||
uint32_t r_batch_size;
|
||||
|
||||
uint32_t r_num_iterations;
|
||||
|
||||
static inline uint32_t& start_result_check_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
|
||||
}
|
||||
@ -101,4 +110,16 @@ public:
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
|
||||
}
|
||||
|
||||
static inline uint32_t& xspn_count_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
|
||||
}
|
||||
|
||||
static inline uint32_t& batch_size_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
|
||||
}
|
||||
|
||||
static inline uint32_t& num_iterations_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
|
||||
}
|
||||
|
||||
};
|
||||
|
Binary file not shown.
@ -48,17 +48,38 @@ int main() {
|
||||
configure_irq(2, spn1_interrupt_handler);
|
||||
configure_irq(22, spn2_interrupt_handler);
|
||||
|
||||
|
||||
uint32_t xspn_count = spn_checker::xspn_count_reg();
|
||||
uint32_t batch_size = spn_checker::batch_size_reg();
|
||||
uint32_t iterations = spn_checker::num_iterations_reg();
|
||||
|
||||
printf("XSPN COUNT: %d\n", xspn_count);
|
||||
if (xspn_count < 1 || xspn_count > 2) {
|
||||
printf("ERROR: invalid XSPN COUNT");
|
||||
return 1;
|
||||
}
|
||||
printf("BATCH SIZE: %d\n", batch_size);
|
||||
printf("ITERATIONS: %d\n", iterations);
|
||||
|
||||
|
||||
|
||||
int in_addr = 0x30000000; // place input samples in the SPI memory
|
||||
int out_addr1 = 0x3C000000;
|
||||
int out_addr2 = 0x3E000000;
|
||||
|
||||
spn_1::mode_reg() = 1;
|
||||
spn_1::start_reg() = 1;
|
||||
wait_for_spn1_interrupt();
|
||||
uint32_t readout = spn_1::readout_reg();
|
||||
printf("READOUT first HW instance:0x%x\n", readout);
|
||||
|
||||
spn_2::mode_reg() = 1;
|
||||
spn_2::start_reg() = 1;
|
||||
wait_for_spn2_interrupt();
|
||||
uint32_t readout2 = spn_2::readout_reg();
|
||||
printf("READOUT second HW instance:0x%x\n", readout2);
|
||||
if (xspn_count == 2) {
|
||||
spn_2::mode_reg() = 1;
|
||||
spn_2::start_reg() = 1;
|
||||
wait_for_spn2_interrupt();
|
||||
uint32_t readout2 = spn_2::readout_reg();
|
||||
printf("READOUT second HW instance:0x%x\n", readout2);
|
||||
}
|
||||
|
||||
uint32_t axi_bytes = readout;
|
||||
axi_bytes = axi_bytes & 0xff;
|
||||
@ -74,39 +95,50 @@ int main() {
|
||||
uint32_t result_bytes = 8;
|
||||
printf("Result Bytes: %d\n", result_bytes);
|
||||
|
||||
const uint32_t amount_of_input_samples = 50000;
|
||||
uint32_t step = 50000;
|
||||
uint32_t iterations = 5;
|
||||
uint32_t in_bytes = batch_size * sample_bytes;
|
||||
uint32_t out_bytes = batch_size * result_bytes;
|
||||
|
||||
uint32_t in_beats = (step * sample_bytes) / axi_bytes;
|
||||
if (in_beats * axi_bytes < step * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = (step * result_bytes) / axi_bytes;
|
||||
if (out_beats * axi_bytes < step * result_bytes) out_beats++;
|
||||
uint32_t total_in = in_bytes * iterations;
|
||||
|
||||
int in_addr = 0x20010000; // place input samples in the SPI memory
|
||||
int out_addr1 = 0x20510000;
|
||||
int out_addr2 = 0x205F0000;
|
||||
if (total_in > (out_addr1 - in_addr)) {
|
||||
printf("ERROR: input data requires %d bytes, only %d bytes available\n", total_in, out_addr1 - in_addr);
|
||||
return 1;
|
||||
}
|
||||
if (out_bytes > (out_addr2 - out_addr1)) {
|
||||
printf("ERROR: output data requires %d bytes, only %d bytes available\n", out_bytes, out_addr2 - out_addr1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint32_t in_beats = in_bytes / axi_bytes;
|
||||
if (in_beats * axi_bytes < batch_size * sample_bytes) in_beats++;
|
||||
uint32_t out_beats = out_bytes / axi_bytes;
|
||||
if (out_beats * axi_bytes < batch_size * result_bytes) out_beats++;
|
||||
|
||||
uint32_t current_in_addr = in_addr;
|
||||
|
||||
// inject SPN input data
|
||||
spn_checker::input_addr_reg() = in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * step * iterations;
|
||||
spn_checker::input_addr_reg() = current_in_addr;
|
||||
spn_checker::num_input_samples_reg() = sample_bytes * batch_size * iterations;
|
||||
spn_checker::start_data_trans_reg() = 1;
|
||||
|
||||
spn_checker::output_addr_reg() = out_addr1;
|
||||
spn_checker::output_addr2_reg() = out_addr2;
|
||||
for (int k = 0; k < iterations*step; k+=step) {
|
||||
run_xspn1(in_addr, out_addr1, step, in_beats, out_beats);
|
||||
run_xspn2(in_addr, out_addr2, step, in_beats, out_beats);
|
||||
wait_for_spn_interrupts();
|
||||
if (xspn_count == 2) {
|
||||
spn_checker::output_addr2_reg() = out_addr2;
|
||||
}
|
||||
for (int k = 0; k < iterations*batch_size; k+=batch_size) {
|
||||
run_xspn1(current_in_addr, out_addr1, batch_size, in_beats, out_beats);
|
||||
if (xspn_count == 2) {
|
||||
run_xspn2(current_in_addr, out_addr2, batch_size, in_beats, out_beats);
|
||||
wait_for_spn_interrupts();
|
||||
} else {
|
||||
wait_for_spn1_interrupt();
|
||||
}
|
||||
printf("XSPN finished\n");
|
||||
spn_checker::offset_reg() = k;
|
||||
spn_checker::length_reg() = step;
|
||||
spn_checker::length_reg() = batch_size;
|
||||
spn_checker::start_result_check_reg() = 1;
|
||||
|
||||
in_addr += step * sample_bytes; // 5 bytes in each sample
|
||||
if (k == amount_of_input_samples) {
|
||||
in_addr = 0x20010000;
|
||||
}
|
||||
current_in_addr += batch_size * sample_bytes; // 5 bytes in each sample (NIPS5)
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -45,6 +45,9 @@
|
||||
#define SPN_CNTL_REG_NUM_INPUT_SAMPLES 0x50
|
||||
#define SPN_CNTL_REG_START_DATA_TRANS 0x60
|
||||
#define SPN_CNTL_REG_OUTPUT_ADDR2 0x70
|
||||
#define SPN_CNTL_REG_XSPN_COUNT 0x80
|
||||
#define SPN_CNTL_REG_BATCH_SIZE 0x90
|
||||
#define SPN_CNTL_REG_NUM_ITERATIONS 0xA0
|
||||
|
||||
template<uint32_t BASE_ADDR>
|
||||
class spn_checker_regs {
|
||||
@ -69,6 +72,12 @@ public:
|
||||
|
||||
uint32_t r_start_data_trans;
|
||||
|
||||
uint32_t r_xspn_count;
|
||||
|
||||
uint32_t r_batch_size;
|
||||
|
||||
uint32_t r_num_iterations;
|
||||
|
||||
static inline uint32_t& start_result_check_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_RESULT_CHECK);
|
||||
}
|
||||
@ -101,4 +110,16 @@ public:
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_START_DATA_TRANS);
|
||||
}
|
||||
|
||||
static inline uint32_t& xspn_count_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_XSPN_COUNT);
|
||||
}
|
||||
|
||||
static inline uint32_t& batch_size_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_BATCH_SIZE);
|
||||
}
|
||||
|
||||
static inline uint32_t& num_iterations_reg(){
|
||||
return *reinterpret_cast<uint32_t*>(BASE_ADDR+SPN_CNTL_REG_NUM_ITERATIONS);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import re
|
||||
|
||||
import sys
|
||||
|
||||
if (len(sys.argv) < 2):
|
||||
print('No argument given')
|
||||
exit()
|
||||
|
||||
|
||||
nips = sys.argv[1]
|
||||
|
||||
input_file = open(nips + '_inputdata.txt', 'r')
|
||||
in_data = input_file.read();
|
||||
|
||||
# convert double to uint8_t
|
||||
in_data = re.sub(r'(\d)\.0+e\+00', r'\1,', in_data)
|
||||
in_data = re.sub(r'(\d)\.(\d)0+e\+01', r'\1\2,', in_data)
|
||||
in_data = re.sub(r'(\d)\.(\d)(\d)0+e\+02', r'\1\2\3,', in_data)
|
||||
in_data = in_data.replace(";", "")
|
||||
|
||||
# remove last comma
|
||||
in_data = in_data[:-2]
|
||||
# count samples
|
||||
input_sample_cnt = len(in_data.split(","))
|
||||
#####################################################################
|
||||
|
||||
ref_file = open(nips + '_outputdata.txt', 'r')
|
||||
ref_data = ref_file.read()
|
||||
ref_data = re.sub(r'\n', r' / ln2,\n', ref_data)
|
||||
ref_data = ref_data[:-2]
|
||||
ref_sample_cnt = len(ref_data.split(","))
|
||||
|
||||
# create cpp file
|
||||
f = open("../xspn_data.cpp", "w")
|
||||
cpp_file = "#include <array>\n"
|
||||
cpp_file += "#include <cmath>\n"
|
||||
cpp_file += "#include <bits/stdint-uintn.h>\n\n"
|
||||
# The results in the outputdata.txt file are not directly what comes out of the PE but the natural logarithm of it.
|
||||
cpp_file += "constexpr auto ln2 = std::log(2);\n"
|
||||
cpp_file += "std::array<uint8_t, " + str(input_sample_cnt) + "> input_data = {\n" + str(in_data) + "}; \n\n"
|
||||
cpp_file += "std::array<double, " + str(ref_sample_cnt) + "> ref_data = {\n" + str(ref_data)+ "}; \n"
|
||||
|
||||
f.write(cpp_file)
|
||||
f.close()
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user