vivado hls fft设计
2016-03-10 20:36
441 查看
HLS的FFT设计步骤
本文基于HLS设计example,FFT > fft_single,其为1024点pipelined streamimg I/O算法。大体代码流程如下:
1.包含hls_fft.h库
#include "hls_fft.h"2.设置预先定义的hls::ip_fft::params_t命名空间中的类成员
struct hls::ip_fft::params_t
3.定义运行时设置,这时axis-4接口的一部分,用于动态调节FFT的一些参数
4.调用FFT函数
hls::fft<param1> (xn1, xk1, &fft_status1, &fft_config1);5.检查运行结果,该过程可选,用于检测是否溢出。
us详细过程如下:
首先定义一个头文件fft_top.h,该文件include进FFT头文件。
前几行的定义输入/输出数据是16bit,FFT点数是(1<<10)1024点。
紧接着params_t的ordering_opt和config_width长度进行了重新设置,一个是输出顺序,一个是axis-4接口配置数据位宽。
typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;将数据变成了以1位表示整数,15bit表示浮点数。
然后声明了三个函数。第一个函数用于声明axis-4参数设置,第二个函数是fft结果的status判断,第三个函数是vivado中fft ip调用。
#include "ap_fixed.h"有了头文件,接下来就是testbench调用fft方法来完成fft了。首先读入16bit数,然后转换成浮点数
#include "hls_fft.h"
// configurable params
const char FFT_INPUT_WIDTH = 16;
const char FFT_OUTPUT_WIDTH = FFT_INPUT_WIDTH;
const char FFT_CONFIG_WIDTH = 16;
const char FFT_NFFT_MAX = 10;
const int FFT_LENGTH = 1 << FFT_NFFT_MAX;
#include <complex>
using namespace std;
struct config1 : hls::ip_fft::params_t {
static const unsigned ordering_opt = hls::ip_fft::natural_order;
static const unsigned config_width = FFT_CONFIG_WIDTH;
};
typedef hls::ip_fft::config_t<config1> config_t;
typedef hls::ip_fft::status_t<config1> status_t;
typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;
typedef ap_fixed<FFT_OUTPUT_WIDTH,FFT_OUTPUT_WIDTH-FFT_INPUT_WIDTH+1> data_out_t;
typedef std::complex<data_in_t> cmpxDataIn;
typedef std::complex<data_out_t> cmpxDataOut;
void dummy_proc_fe(
bool direction,
config_t* config,
cmpxDataIn in[FFT_LENGTH],
cmpxDataIn out[FFT_LENGTH]);
void dummy_proc_be(
status_t* status_in,
bool* ovflo,
cmpxDataOut in[FFT_LENGTH],
cmpxDataOut out[FFT_LENGTH]);
void fft_top(
bool direction,
cmpxDataIn in[FFT_LENGTH],
cmpxDataOut out[FFT_LENGTH],
bool* ovflo);
int main() { const int SIM_FRAMES = 1; const int SAMPLES = (1 << FFT_NFFT_MAX); int error_num = 0; bool ovflo_all = false; char res_filename[BUF_SIZE]={0}; char dat_filename[BUF_SIZE]={0}; static cmpxDataIn xn_input[SAMPLES]; static cmpxDataOut xk_output[SAMPLES]; for (int frame = 0; frame < SIM_FRAMES; ++frame) { int NFFT = 0; int CP_LEN = 0; // length of the cyclic prefix to be inserted for each frame int FWD_INV = 0; int sc_sch = 0; int line_no = 1; FILE *stimfile; // Open stimulus .dat file for reading sprintf(dat_filename, "stimulus_%02d", frame); strcat(dat_filename,".dat"); stimfile = fopen(dat_filename, "r"); int tmp_re, tmp_im; float dummy_re, dummy_im; const int max = 1 << FFT_INPUT_WIDTH; // might not work for > 32 bits! const int max_half_minus_one = (max/2)-1; // Scaling factor to get integer into -1 <= x < +1 range const double sc = ldexp(1.0, FFT_INPUT_WIDTH-1); // might not work for > 32 bits! if (stimfile == NULL) { printf("ERROR: Can't open %s\n",dat_filename); exit(999); } else { printf("INFO: Reading %s\n",dat_filename); while (fgetc(stimfile) != EOF && line_no < SAMPLES+5) { switch (line_no) { case 1: // Point size fscanf(stimfile,"%X",&NFFT); printf("NFFT %d\n",NFFT); break; case 2: // CP length fscanf(stimfile,"%X",&CP_LEN); printf("CP_LEN %d\n",CP_LEN); break; case 3: // fwd-inv fscanf(stimfile,"%X",&FWD_INV); printf("FWD_INV %d\n",FWD_INV); break; case 4: // Scaling schedule sc_sch fscanf(stimfile,"%X",&sc_sch); printf("sc_sch %X\n",sc_sch); break; default: // hex data (first 2 columns) fscanf(stimfile,"%x %x %f %f",&tmp_re,&tmp_im,&dummy_re,&dummy_im); //printf("%x %x\n",tmp_re,tmp_im); double input_data_re, input_data_im; if (tmp_re > max_half_minus_one) { input_data_re = ((tmp_re-65536)/sc); } else { input_data_re = (tmp_re/sc); } //xn_input[line_no-5].re = input_data_re; //xn_re_hw[line_no-5] = dummy_re; if (tmp_im > max_half_minus_one) { input_data_im = ((tmp_im-65536)/sc); } else { input_data_im = (tmp_im/sc); } //xn_input[line_no-5].im = input_data_im; //xn_im_hw[line_no-5] = dummy_im; xn_input[line_no-5] = cmpxDataIn(input_data_re, input_data_im); } line_no++; } } fclose(stimfile);然后调用fft和完成fft变换
fft_top(FWD_INV, xn_input, xk_output, &ovflo);至此,fft已经算是完成了,接下来就是读入预先评估的result,比对结果了。
FILE* resfile; sprintf(res_filename, "stimulus_%02d", frame); strcat(res_filename,".res"); if ((resfile = fopen(res_filename, "r")) == 0) { printf("ERROR: Can't open %s\n", res_filename); exit(888); } int tmp; fscanf(resfile, "%X", &tmp); fscanf(resfile, "%X", &tmp); for (int i = 0; i < (1<<NFFT); i++) { fscanf(resfile,"%x %x %f %f", &tmp_re, &tmp_im, &dummy_re, &dummy_im); data_out_t golden = dummy_re; //if (golden != xk_output[i].re) if (golden != xk_output[i].real()) { error_num++; cout << "Frame:" << frame << " index: " << i << " Golden: " << golden.to_float() << " vs. RE Output: " << setprecision(14) << xk_output[i].real().to_float() << endl; } golden = dummy_im; //if (golden != xk_output[i].im) if (golden != xk_output[i].imag()) { error_num++; cout << "Frame:" << frame << " index: " << i << " Golden: " << golden.to_float() << " vs. IM Output: " << setprecision(14) << xk_output[i].imag().to_float() << endl; } } fclose(resfile); } cout << " ERRORS: " << error_num << endl; if (error_num > 0) cout << " (FAILED!!!)" << endl; else if (ovflo_all) cout << " (OVERFLOW!!!)" << endl; else cout << " (PASSED!!!)" << endl; if (error_num > 0) return 1; else return 0; }
其调用的fft所在的文件是fft_top是fft_top.c文件里的函数,该函数将被做成ip,
#include "fft_top.h" void dummy_proc_fe( bool direction, config_t* config, cmpxDataIn in[FFT_LENGTH], cmpxDataIn out[FFT_LENGTH]) { int i; config->setDir(direction); config->setSch(0x2AB); for (i=0; i< FFT_LENGTH; i++) out[i] = in[i]; } void dummy_proc_be( status_t* status_in, bool* ovflo, cmpxDataOut in[FFT_LENGTH], cmpxDataOut out[FFT_LENGTH]) { int i; for (i=0; i< FFT_LENGTH; i++) out[i] = in[i]; *ovflo = status_in->getOvflo() & 0x1; } void fft_top( bool direction, complex<data_in_t> in[FFT_LENGTH], complex<data_out_t> out[FFT_LENGTH], bool* ovflo) { #pragma HLS interface ap_hs port=direction #pragma HLS interface ap_fifo depth=1 port=ovflo #pragma HLS interface ap_fifo depth=FFT_LENGTH port=in,out #pragma HLS data_pack variable=in #pragma HLS data_pack variable=out #pragma HLS dataflow complex<data_in_t> xn[FFT_LENGTH]; complex<data_out_t> xk[FFT_LENGTH]; config_t fft_config; status_t fft_status; dummy_proc_fe(direction, &fft_config, in, xn); // FFT IP hls::fft<config1>(xn, xk, &fft_status, &fft_config); dummy_proc_be(&fft_status, ovflo, xk, out); }
dummy_proc_fe函数做了两个工作,一个是设置是fft还是逆fft,另外一个是设置scale值即0x2ab,也即[2 2 2 2 3],即蝶形算法的每一级右移的位数,这样确保最后的结果也是16位的。所以scale对应的十进制值是2^2+2^2+2^2+2^2+2^3=2048。
再来看看仿真的数据和结果:
数据见stimulus_00.dat文件,这里截取部分片段:
0A 0 1 2AB A437 4C07 -0.71707153320312 0.59396362304688 6015 333C 0.75064086914062 0.40026855468750 B251 FFA6 -0.60690307617188 -0.00274658203125 FD76 85F5 -0.01983642578125 -0.95346069335938stimulus_00.res文件的结果如下(部分):
00 0 0076 FEF9 0.00360107421875 -0.00802612304688 00C5 009C 0.00601196289062 0.00476074218750 00DD FEAF 0.00674438476562 -0.01028442382812 0103 0014 0.00790405273438 0.00061035156250 0052 0048 0.00250244140625 0.00219726562500 011F 00D5 0.00875854492188 0.00650024414062 FFE9 FFFA -0.00070190429688 -0.00018310546875 FF2C 013F -0.00646972656250 0.00973510742188前面两行的并没有什么用,实际使用时发现如果不在真正数据前放写数,读入会出错,似乎是vivado_hls的一个bug。
matlab的结果如下(部分):
7.511932373046726 - 16.318267822265767i 12.4071061347653 + 9.84830812777682i 13.9168538790000 - 20.9499609691262i 16.3122678174807 + 1.35817962557640i 5.22772324927469 + 4.60450708754723i 18.0549676502232 + 13.4125569611171i -1.34364834348118 - 0.280272736963835i -13.1356602407057 + 20.0124305673796i 0.454103601471324 + 22.6374966808317i -6.94603384852326 - 1.77684245234773i -0.526709138800964 - 38.7395841354917i 14.6932655039269 + 6.50847432331556i 11.2043508653130 + 23.3736128226132i 20.8573462890038 - 4.58878154007059i 3.49584233061040 - 7.66236741383999i -30.2315244786224 - 20.9903229972919i
这里我开始也是困惑了,和matlab结果相差比较大。这是由于scale的原因。
7.51193237304673/2048= 0.0037 <pre name="code" class="cpp">16.318267822265767/2048 = 0.008
其它项依次类推。
值得注意的是,由于采用了scaling策略,这必然会导致可能存在一些问题,比如如果做了fft然后将两个fft的一维数组进行共轭相乘,然后在ifft,这样的话存在一个问题,那就是scaling可能存在问题,所以这时可能采用float型更合适,修改的方法也很简单,只需要将fft_top.h中关于data_in_t和data_out_t进行重新定义,定义的方法如下:
typedef float data_in_t; typedef float data_out_t;
这样就会调用浮点数IP核进行运算了。
但是还有一个地方需要更改,是因为浮点数要求phase factor必须是24或者25bit的。
struct config1 : hls::ip_fft::params_t { static const unsigned ordering_opt = hls::ip_fft::natural_order; static const unsigned phase_factor_width = 24; static const unsigned config_width = FFT_CONFIG_WIDTH; };
IP的参数设置必须满足以下的要求:
///////////// IP parameters legality checking ///////////// // Check CONFIG_T::config_width config_ch->checkBitWidth(FFT_DATA_FORMAT); // Check CONFIG_T::status_width status->checkBitWidth(); // Check ip parameters if (CONFIG_T::channels < 1 || CONFIG_T::channels > 12) { std::cerr << ip_fft::fftErrChkHead << "Channels = " << (int)CONFIG_T::channels << " is illegal. It should be from 1 to 12." << std::endl; exit(1); } if (CONFIG_T::max_nfft < 3 || CONFIG_T::max_nfft > 16) { std::cerr << ip_fft::fftErrChkHead << "NFFT_MAX = " << (int)CONFIG_T::max_nfft << " is illegal. It should be from 3 to 16." << std::endl; exit(1); } unsigned length = FFT_LENGTH; if (!CONFIG_T::has_nfft) { if (FFT_LENGTH != (1 << CONFIG_T::max_nfft)) { std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH << " is illegal. Log2(FFT_LENGTH) should equal to NFFT_MAX when run-time configurable length is disabled." << std::endl; exit(1); } } else if (length & (length - 1)) { std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH << " is illegal. It should be the integer power of 2." << std::endl; exit(1); } else if (NFFT < 3 || NFFT > 16) { std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH << " is illegal. Log2(FFT_LENGTH) should be from 3 to 16." << std::endl; exit(1); } else if (NFFT > CONFIG_T::max_nfft) { std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH << " is illegal. Log2(FFT_LENGTH) should be less than or equal to NFFT_MAX." << std::endl; exit(1); } #if 0 else if (NFFT != config_ch->getNfft()) { std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH << " is illegal. Log2(FFT_LENGTH) should equal to NFFT field of configure channel." << std::endl; exit(1); } #endif if ((FFT_INPUT_WIDTH < 8) || (FFT_INPUT_WIDTH > 40)) { std::cerr << ip_fft::fftErrChkHead << "FFT_INPUT_WIDTH = " << (int)FFT_INPUT_WIDTH << " is illegal. It should be 8,16,24,32,40." << std::endl; exit(1); } if (CONFIG_T::scaling_opt == ip_fft::unscaled && FFT_DATA_FORMAT != ip_fft::floating_point) { unsigned golden = FFT_INPUT_WIDTH + CONFIG_T::max_nfft + 1; golden = ((golden + 7) >> 3) << 3; if (FFT_OUTPUT_WIDTH != golden) { std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH << " is illegal with unscaled arithmetic. It should be input_width+nfft_max+1." << std::endl; exit(1); } } else if (FFT_OUTPUT_WIDTH != FFT_INPUT_WIDTH) { std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH << " is illegal. It should be the same as input_width." << std::endl; exit(1); } if (CONFIG_T::channels > 1 && CONFIG_T::arch_opt == ip_fft::pipelined_streaming_io) { std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels << " and FFT_ARCH = pipelined_streaming_io" << " is illegal. pipelined_streaming_io architecture is not supported when channels is bigger than 1." << std::endl; exit(1); } if (CONFIG_T::channels > 1 && FFT_DATA_FORMAT == ip_fft::floating_point) { std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels << " is illegal with floating point data format. Floating point data format only supports 1 channel." << std::endl; exit(1); } if (FFT_DATA_FORMAT == ip_fft::floating_point) { if (CONFIG_T::phase_factor_width != 24 && CONFIG_T::phase_factor_width != 25) { std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width << " is illegal with floating point data format. It should be 24 or 25." << std::endl; exit(1); } } else if (CONFIG_T::phase_factor_width < 8 || CONFIG_T::phase_factor_width > 34) { std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width << " is illegal. It should be from 8 to 34." << std::endl; exit(1); }
相关文章推荐
- CocoaPods安装过程
- ubuntu 修改root密码
- 哪两个数据结构,同时具有较高的查找和删除性能
- ORACLE百例试炼三
- Android 开发百度地图之三GEO
- 丑数
- 更轻量的 View Controllers
- Java程序设计概述及环境(Java核心技术卷Ⅰ)
- iOS为分类添加属性
- 伸展树的实现qt,会的请截图留言
- requirejs 第一个实例
- 面向对象与设计模式
- java语言的I/O操作预习
- 尚学堂java基础——第13、14集笔记
- Android NDK的C++ STL开发相关总结
- 几种常用排序算法的复习
- 几种常用排序算法的复习
- 《我是一只IT小小鸟》读后感
- MQL指标:理解生命周期函数、首态数据处理与尾部动态更新
- bzoj 1614(二分+最短路)