您的位置：首页 > 其它

vivado hls fft设计

2016-03-10 20:36 441 查看

HLS的FFT设计步骤

本文基于HLS设计example，FFT > fft_single，其为1024点pipelined streamimg I/O算法。

大体代码流程如下：

1.包含hls_fft.h库

#include "hls_fft.h"

2.设置预先定义的hls::ip_fft::params_t命名空间中的类成员

struct hls::ip_fft::params_t

3.定义运行时设置，这时axis-4接口的一部分，用于动态调节FFT的一些参数

4.调用FFT函数

hls::fft<param1> (xn1, xk1, &fft_status1, &fft_config1);

5.检查运行结果，该过程可选，用于检测是否溢出。

us详细过程如下：

首先定义一个头文件fft_top.h，该文件include进FFT头文件。

前几行的定义输入/输出数据是16bit，FFT点数是（1<<10）1024点。

紧接着params_t的ordering_opt和config_width长度进行了重新设置，一个是输出顺序，一个是axis-4接口配置数据位宽。

typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;

将数据变成了以1位表示整数，15bit表示浮点数。

然后声明了三个函数。第一个函数用于声明axis-4参数设置，第二个函数是fft结果的status判断，第三个函数是vivado中fft ip调用。

#include "ap_fixed.h"
#include "hls_fft.h"
// configurable params
const char FFT_INPUT_WIDTH                     = 16;
const char FFT_OUTPUT_WIDTH                    = FFT_INPUT_WIDTH;
const char FFT_CONFIG_WIDTH                    = 16;
const char FFT_NFFT_MAX                        = 10;
const int  FFT_LENGTH                          = 1 << FFT_NFFT_MAX;

#include <complex>
using namespace std;

struct config1 : hls::ip_fft::params_t {
static const unsigned ordering_opt = hls::ip_fft::natural_order;
static const unsigned config_width = FFT_CONFIG_WIDTH;
};

typedef hls::ip_fft::config_t<config1> config_t;
typedef hls::ip_fft::status_t<config1> status_t;

typedef ap_fixed<FFT_INPUT_WIDTH,1> data_in_t;
typedef ap_fixed<FFT_OUTPUT_WIDTH,FFT_OUTPUT_WIDTH-FFT_INPUT_WIDTH+1> data_out_t;
typedef std::complex<data_in_t> cmpxDataIn;
typedef std::complex<data_out_t> cmpxDataOut;

void dummy_proc_fe(
bool direction,
config_t* config,
cmpxDataIn in[FFT_LENGTH],
cmpxDataIn out[FFT_LENGTH]);

void dummy_proc_be(
status_t* status_in,
bool* ovflo,
cmpxDataOut in[FFT_LENGTH],
cmpxDataOut out[FFT_LENGTH]);

void fft_top(
bool direction,
cmpxDataIn in[FFT_LENGTH],
cmpxDataOut out[FFT_LENGTH],
bool* ovflo);

有了头文件，接下来就是testbench调用fft方法来完成fft了。首先读入16bit数，然后转换成浮点数

int main()
{
const int SIM_FRAMES = 1;
const int SAMPLES = (1 << FFT_NFFT_MAX);

int error_num = 0;
bool ovflo_all = false;
char res_filename[BUF_SIZE]={0};
char dat_filename[BUF_SIZE]={0};
static cmpxDataIn xn_input[SAMPLES];
static cmpxDataOut xk_output[SAMPLES];

for (int frame = 0; frame < SIM_FRAMES; ++frame)
{
int NFFT = 0;
int CP_LEN = 0; // length of the cyclic prefix to be inserted for each frame
int FWD_INV = 0;
int sc_sch = 0;
int line_no = 1;
FILE *stimfile;

// Open stimulus .dat file for reading
sprintf(dat_filename, "stimulus_%02d", frame);
strcat(dat_filename,".dat");
stimfile = fopen(dat_filename, "r");

int tmp_re, tmp_im;
float dummy_re, dummy_im;
const int max = 1 << FFT_INPUT_WIDTH; // might not work for > 32 bits!
const int max_half_minus_one = (max/2)-1;
// Scaling factor to get integer into -1 <= x < +1 range
const double sc = ldexp(1.0, FFT_INPUT_WIDTH-1); // might not work for > 32 bits!

if (stimfile == NULL)
{
printf("ERROR: Can't open %s\n",dat_filename);
exit(999);
}
else
{
printf("INFO: Reading %s\n",dat_filename);
while (fgetc(stimfile) != EOF && line_no < SAMPLES+5)
{
switch (line_no)
{
case 1:
// Point size
fscanf(stimfile,"%X",&NFFT);
printf("NFFT %d\n",NFFT);
break;
case 2:
// CP length
fscanf(stimfile,"%X",&CP_LEN);
printf("CP_LEN %d\n",CP_LEN);
break;
case 3:
// fwd-inv
fscanf(stimfile,"%X",&FWD_INV);
printf("FWD_INV %d\n",FWD_INV);
break;
case 4:
// Scaling schedule sc_sch
fscanf(stimfile,"%X",&sc_sch);
printf("sc_sch %X\n",sc_sch);
break;
default:
// hex data (first 2 columns)
fscanf(stimfile,"%x %x %f %f",&tmp_re,&tmp_im,&dummy_re,&dummy_im);
//printf("%x %x\n",tmp_re,tmp_im);

double input_data_re, input_data_im;
if (tmp_re > max_half_minus_one) {
input_data_re = ((tmp_re-65536)/sc);
} else {
input_data_re = (tmp_re/sc);
}
//xn_input[line_no-5].re = input_data_re;
//xn_re_hw[line_no-5] = dummy_re;

if (tmp_im > max_half_minus_one) {
input_data_im = ((tmp_im-65536)/sc);
} else {
input_data_im = (tmp_im/sc);
}
//xn_input[line_no-5].im = input_data_im;
//xn_im_hw[line_no-5] = dummy_im;

xn_input[line_no-5] = cmpxDataIn(input_data_re, input_data_im);

}
line_no++;
}
}
fclose(stimfile);

然后调用fft和完成fft变换

fft_top(FWD_INV, xn_input, xk_output, &ovflo);

至此，fft已经算是完成了，接下来就是读入预先评估的result，比对结果了。

FILE* resfile;
sprintf(res_filename, "stimulus_%02d", frame);
strcat(res_filename,".res");
if ((resfile = fopen(res_filename, "r")) == 0)
{
printf("ERROR: Can't open %s\n", res_filename);
exit(888);
}

int tmp;
fscanf(resfile, "%X", &tmp);
fscanf(resfile, "%X", &tmp);
for (int i = 0; i < (1<<NFFT); i++)
{
fscanf(resfile,"%x %x %f %f", &tmp_re, &tmp_im, &dummy_re, &dummy_im);
data_out_t golden = dummy_re;
//if (golden != xk_output[i].re)
if (golden != xk_output[i].real())
{
error_num++;
cout << "Frame:" << frame << " index: " << i
<< "  Golden: " <<  golden.to_float() << " vs. RE Output: " << setprecision(14) << xk_output[i].real().to_float() << endl;
}
golden = dummy_im;
//if (golden != xk_output[i].im)
if (golden != xk_output[i].imag())
{
error_num++;
cout << "Frame:" << frame << " index: " << i
<< "  Golden: " << golden.to_float() << " vs. IM Output: " << setprecision(14) << xk_output[i].imag().to_float() << endl;
}
}
fclose(resfile);
}

cout << " ERRORS: " << error_num << endl;
if (error_num > 0)
cout << " (FAILED!!!)" << endl;
else if (ovflo_all)
cout << " (OVERFLOW!!!)" << endl;
else
cout << " (PASSED!!!)" << endl;

if (error_num > 0)
return 1;
else
return 0;
}

其调用的fft所在的文件是fft_top是fft_top.c文件里的函数，该函数将被做成ip，

#include "fft_top.h"

void dummy_proc_fe(
bool direction,
config_t* config,
cmpxDataIn in[FFT_LENGTH],
cmpxDataIn out[FFT_LENGTH])
{
int i;
config->setDir(direction);
config->setSch(0x2AB);
for (i=0; i< FFT_LENGTH; i++)
out[i] = in[i];
}

void dummy_proc_be(
status_t* status_in,
bool* ovflo,
cmpxDataOut in[FFT_LENGTH],
cmpxDataOut out[FFT_LENGTH])
{
int i;
for (i=0; i< FFT_LENGTH; i++)
out[i] = in[i];
*ovflo = status_in->getOvflo() & 0x1;
}

void fft_top(
bool direction,
complex<data_in_t> in[FFT_LENGTH],
complex<data_out_t> out[FFT_LENGTH],
bool* ovflo)
{
#pragma HLS interface ap_hs port=direction
#pragma HLS interface ap_fifo depth=1 port=ovflo
#pragma HLS interface ap_fifo depth=FFT_LENGTH port=in,out
#pragma HLS data_pack variable=in
#pragma HLS data_pack variable=out
#pragma HLS dataflow
complex<data_in_t> xn[FFT_LENGTH];
complex<data_out_t> xk[FFT_LENGTH];
config_t fft_config;
status_t fft_status;

dummy_proc_fe(direction, &fft_config, in, xn);
// FFT IP
hls::fft<config1>(xn, xk, &fft_status, &fft_config);
dummy_proc_be(&fft_status, ovflo, xk, out);
}

dummy_proc_fe函数做了两个工作，一个是设置是fft还是逆fft，另外一个是设置scale值即0x2ab，也即[2 2 2 2 3]，即蝶形算法的每一级右移的位数，这样确保最后的结果也是16位的。所以scale对应的十进制值是2^2+2^2+2^2+2^2+2^3=2048。

再来看看仿真的数据和结果：

数据见stimulus_00.dat文件，这里截取部分片段：

0A
0
1
2AB
A437 4C07 -0.71707153320312  0.59396362304688
6015 333C  0.75064086914062  0.40026855468750
B251 FFA6 -0.60690307617188 -0.00274658203125
FD76 85F5 -0.01983642578125 -0.95346069335938

stimulus_00.res文件的结果如下（部分）：

00
0
0076 FEF9  0.00360107421875 -0.00802612304688
00C5 009C  0.00601196289062  0.00476074218750
00DD FEAF  0.00674438476562 -0.01028442382812
0103 0014  0.00790405273438  0.00061035156250
0052 0048  0.00250244140625  0.00219726562500
011F 00D5  0.00875854492188  0.00650024414062
FFE9 FFFA -0.00070190429688 -0.00018310546875
FF2C 013F -0.00646972656250  0.00973510742188

前面两行的并没有什么用，实际使用时发现如果不在真正数据前放写数，读入会出错，似乎是vivado_hls的一个bug。

matlab的结果如下（部分）：

7.511932373046726 - 16.318267822265767i
12.4071061347653 + 9.84830812777682i
13.9168538790000 - 20.9499609691262i
16.3122678174807 + 1.35817962557640i
5.22772324927469 + 4.60450708754723i
18.0549676502232 + 13.4125569611171i
-1.34364834348118 - 0.280272736963835i
-13.1356602407057 + 20.0124305673796i
0.454103601471324 + 22.6374966808317i
-6.94603384852326 - 1.77684245234773i
-0.526709138800964 - 38.7395841354917i
14.6932655039269 + 6.50847432331556i
11.2043508653130 + 23.3736128226132i
20.8573462890038 - 4.58878154007059i
3.49584233061040 - 7.66236741383999i
-30.2315244786224 - 20.9903229972919i

这里我开始也是困惑了，和matlab结果相差比较大。这是由于scale的原因。

7.51193237304673/2048= 0.0037
<pre name="code" class="cpp">16.318267822265767/2048 = 0.008

其它项依次类推。

值得注意的是，由于采用了scaling策略，这必然会导致可能存在一些问题，比如如果做了fft然后将两个fft的一维数组进行共轭相乘，然后在ifft，这样的话存在一个问题，那就是scaling可能存在问题，所以这时可能采用float型更合适，修改的方法也很简单，只需要将fft_top.h中关于data_in_t和data_out_t进行重新定义，定义的方法如下：

typedef float data_in_t;
typedef float data_out_t;

这样就会调用浮点数IP核进行运算了。

但是还有一个地方需要更改，是因为浮点数要求phase factor必须是24或者25bit的。

struct config1 : hls::ip_fft::params_t {
static const unsigned ordering_opt = hls::ip_fft::natural_order;
static const unsigned phase_factor_width = 24;
static const unsigned config_width = FFT_CONFIG_WIDTH;
};

IP的参数设置必须满足以下的要求：

///////////// IP parameters legality checking /////////////

// Check CONFIG_T::config_width
config_ch->checkBitWidth(FFT_DATA_FORMAT);

// Check CONFIG_T::status_width
status->checkBitWidth();

// Check ip parameters
if (CONFIG_T::channels < 1 || CONFIG_T::channels > 12)
{
std::cerr << ip_fft::fftErrChkHead << "Channels = " << (int)CONFIG_T::channels
<< " is illegal. It should be from 1 to 12."
<< std::endl;
exit(1);
}

if (CONFIG_T::max_nfft < 3 || CONFIG_T::max_nfft > 16)
{
std::cerr << ip_fft::fftErrChkHead << "NFFT_MAX = " << (int)CONFIG_T::max_nfft
<< " is illegal. It should be from 3 to 16."
<< std::endl;
exit(1);
}

unsigned length = FFT_LENGTH;
if (!CONFIG_T::has_nfft)
{
if (FFT_LENGTH != (1 << CONFIG_T::max_nfft))
{
std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
<< " is illegal. Log2(FFT_LENGTH) should equal to NFFT_MAX when run-time configurable length is disabled."
<< std::endl;
exit(1);
}
}
else if (length & (length - 1))
{
std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
<< " is illegal. It should be the integer power of 2."
<< std::endl;
exit(1);
}
else if (NFFT < 3 || NFFT > 16)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
<< " is illegal. Log2(FFT_LENGTH) should be from 3 to 16."
<< std::endl;
exit(1);
}
else if (NFFT > CONFIG_T::max_nfft)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
<< " is illegal. Log2(FFT_LENGTH) should be less than or equal to NFFT_MAX."
<< std::endl;
exit(1);
}
#if 0
else if (NFFT != config_ch->getNfft())
{
std::cerr << ip_fft::fftErrChkHead << "FFT_LENGTH = " << (int)FFT_LENGTH
<< " is illegal. Log2(FFT_LENGTH) should equal to NFFT field of configure channel."
<< std::endl;
exit(1);
}
#endif

if ((FFT_INPUT_WIDTH < 8) || (FFT_INPUT_WIDTH > 40))
{
std::cerr << ip_fft::fftErrChkHead << "FFT_INPUT_WIDTH = " << (int)FFT_INPUT_WIDTH
<< " is illegal. It should be 8,16,24,32,40."
<< std::endl;
exit(1);
}

if (CONFIG_T::scaling_opt == ip_fft::unscaled && FFT_DATA_FORMAT != ip_fft::floating_point)
{
unsigned golden = FFT_INPUT_WIDTH + CONFIG_T::max_nfft + 1;
golden = ((golden + 7) >> 3) << 3;
if (FFT_OUTPUT_WIDTH != golden)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH
<< " is illegal with unscaled arithmetic. It should be input_width+nfft_max+1."
<< std::endl;
exit(1);
}
}
else if (FFT_OUTPUT_WIDTH != FFT_INPUT_WIDTH)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_OUTPUT_WIDTH = " << (int)FFT_OUTPUT_WIDTH
<< " is illegal. It should be the same as input_width."
<< std::endl;
exit(1);
}

if (CONFIG_T::channels > 1 && CONFIG_T::arch_opt == ip_fft::pipelined_streaming_io)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels << " and FFT_ARCH = pipelined_streaming_io"
<< " is illegal. pipelined_streaming_io architecture is not supported when channels is bigger than 1."
<< std::endl;
exit(1);
}

if (CONFIG_T::channels > 1 && FFT_DATA_FORMAT == ip_fft::floating_point)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_CHANNELS = " << (int)CONFIG_T::channels
<< " is illegal with floating point data format. Floating point data format only supports 1 channel."
<< std::endl;
exit(1);
}

if (FFT_DATA_FORMAT == ip_fft::floating_point)
{
if (CONFIG_T::phase_factor_width != 24 && CONFIG_T::phase_factor_width != 25)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width
<< " is illegal with floating point data format. It should be 24 or 25."
<< std::endl;
exit(1);
}
}
else if (CONFIG_T::phase_factor_width < 8 || CONFIG_T::phase_factor_width > 34)
{
std::cerr << ip_fft::fftErrChkHead << "FFT_PHASE_FACTOR_WIDTH = " << (int)CONFIG_T::phase_factor_width
<< " is illegal. It should be from 8 to 34."
<< std::endl;
exit(1);
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航