docs(fft): add C example for CDMA + FFT workflow
This commit is contained in:
11
sdk/software/examples/fft_dma/Makefile
Normal file
11
sdk/software/examples/fft_dma/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
TARGET = fft_dma
|
||||
|
||||
CFLAGS += -O3 -g
|
||||
|
||||
C_SRCS := $(wildcard ./*.c )
|
||||
|
||||
OBJDIR = obj
|
||||
COMMON_DIR = ../../bsp
|
||||
GCC_DIR=../../../toolchains/loongson-gnu-toolchain-8.3-x86_64-loongarch32r-linux-gnusf-v2.0
|
||||
PICOLIBC_DIR=../../../toolchains/picolibc
|
||||
include ../../bsp/common.mk
|
||||
205
sdk/software/examples/fft_dma/main.c
Normal file
205
sdk/software/examples/fft_dma/main.c
Normal file
@@ -0,0 +1,205 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <common_func.h>
|
||||
#include <confreg_time.h>
|
||||
|
||||
// BSP板级支持包所需全局变量
|
||||
unsigned long UART_BASE = 0xbf000000;
|
||||
unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
|
||||
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
||||
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
||||
|
||||
#define FFT_BASE 0xbf400000
|
||||
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
|
||||
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
|
||||
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
|
||||
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
|
||||
#define FFT_CSR_REG (FFT_BASE + 0xF000)
|
||||
#define FFT_CTRL_START (1 << 4)
|
||||
#define FFT_STAT_DONE (1 << 1)
|
||||
#define FFT_STAT_BUSY (1 << 0)
|
||||
#define FFT_POINT_NUM 1024
|
||||
|
||||
#define DMA_BASE 0xbf300000
|
||||
#define DMA_CTRL (DMA_BASE + 0x0000)
|
||||
#define DMA_LEN (DMA_BASE + 0x0004)
|
||||
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
|
||||
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
|
||||
#define DMA_STATUS (DMA_BASE + 0x0010)
|
||||
|
||||
const float PI = 3.14159265358979323846;
|
||||
|
||||
// DMA 传输通用封装函数 (阻塞等待模式)
|
||||
void dma_transfer(uint32_t phys_src, uint32_t phys_dst, uint32_t byte_len) {
|
||||
RegWrite(DMA_SRC_ADDR, phys_src);
|
||||
RegWrite(DMA_DST_ADDR, phys_dst);
|
||||
RegWrite(DMA_LEN, byte_len);
|
||||
|
||||
// burst_len = 15(16拍), burst_size = 2(4字节), start = 1
|
||||
uint32_t ctrl_val = (15 << 6) | (2 << 3) | 0x01;
|
||||
RegWrite(DMA_CTRL, ctrl_val);
|
||||
|
||||
// 轮询等待 DMA 搬运完成
|
||||
while ((RegRead(DMA_STATUS) & 0x01) == 0) {
|
||||
// CPU 空转等待
|
||||
}
|
||||
}
|
||||
|
||||
// 软件FFT实现 (基2 DIT-FFT 算法)
|
||||
void sw_fft(float re[], float im[], int n) {
|
||||
int i, j, k, l;
|
||||
float tr, ti, ur, ui, wr, wi;
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < n - 1; i++) {
|
||||
if (i < j) {
|
||||
tr = re[i]; ti = im[i];
|
||||
re[i] = re[j]; im[i] = im[j];
|
||||
re[j] = tr; im[j] = ti;
|
||||
}
|
||||
k = n / 2;
|
||||
while (k <= j) {
|
||||
j -= k;
|
||||
k /= 2;
|
||||
}
|
||||
j += k;
|
||||
}
|
||||
|
||||
for (l = 1; l < n; l *= 2) {
|
||||
ur = 1.0;
|
||||
ui = 0.0;
|
||||
wr = cos(PI / l);
|
||||
wi = -sin(PI / l);
|
||||
|
||||
for (i = 0; i < n; i += 2 * l) {
|
||||
ur = 1.0;
|
||||
ui = 0.0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int p = i + j;
|
||||
int q = p + l;
|
||||
|
||||
tr = re[q] * ur - im[q] * ui;
|
||||
ti = re[q] * ui + im[q] * ur;
|
||||
|
||||
re[q] = re[p] - tr;
|
||||
im[q] = im[p] - ti;
|
||||
re[p] += tr;
|
||||
im[p] += ti;
|
||||
|
||||
float next_ur = ur * wr - ui * wi;
|
||||
ui = ur * wi + ui * wr;
|
||||
ur = next_ur;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 全局对齐数组:作为 DMA 的源和目的内存
|
||||
int32_t hw_in_re_arr[FFT_POINT_NUM] __attribute__((aligned(64)));
|
||||
int32_t hw_in_im_arr[FFT_POINT_NUM] __attribute__((aligned(64)));
|
||||
int32_t hw_out_re_arr[FFT_POINT_NUM] __attribute__((aligned(64)));
|
||||
int32_t hw_out_im_arr[FFT_POINT_NUM] __attribute__((aligned(64)));
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
unsigned int fft_csr = RegRead(FFT_CSR_REG);
|
||||
printf("fft_csr init = %x\n", fft_csr);
|
||||
|
||||
// 获取外设的纯物理地址 (屏蔽高3位 0x1FFFFFFF)
|
||||
uint32_t phys_fft_in_re = FFT_IN_RE_BASE & 0x1FFFFFFF;
|
||||
uint32_t phys_fft_in_im = FFT_IN_IM_BASE & 0x1FFFFFFF;
|
||||
uint32_t phys_fft_out_re = FFT_OUT_RE_BASE & 0x1FFFFFFF;
|
||||
uint32_t phys_fft_out_im = FFT_OUT_IM_BASE & 0x1FFFFFFF;
|
||||
|
||||
// 获取内存数组的纯物理地址,并包装为 CPU 使用的无缓存(Uncached)指针
|
||||
volatile int32_t *uncached_in_re = (volatile int32_t *)(((uint32_t)hw_in_re_arr & 0x1FFFFFFF) | 0xA0000000);
|
||||
volatile int32_t *uncached_in_im = (volatile int32_t *)(((uint32_t)hw_in_im_arr & 0x1FFFFFFF) | 0xA0000000);
|
||||
volatile int32_t *uncached_out_re = (volatile int32_t *)(((uint32_t)hw_out_re_arr & 0x1FFFFFFF) | 0xA0000000);
|
||||
volatile int32_t *uncached_out_im = (volatile int32_t *)(((uint32_t)hw_out_im_arr & 0x1FFFFFFF) | 0xA0000000);
|
||||
|
||||
// 准备软件 FFT 测试数据
|
||||
float sw_in_re[FFT_POINT_NUM];
|
||||
float sw_in_im[FFT_POINT_NUM];
|
||||
|
||||
// 初始化测试波形
|
||||
for (int i = 0; i < FFT_POINT_NUM; i++) {
|
||||
float dc_part = 4000.0f;
|
||||
float f10_part = 8000.0f * cos(2 * PI * 10.0 * i / FFT_POINT_NUM);
|
||||
float f200_part = 6000.0f * sin(2 * PI * 200.0 * i / FFT_POINT_NUM);
|
||||
float f400_part = 3000.0f * sin(2 * PI * 400.0 * i / FFT_POINT_NUM);
|
||||
|
||||
sw_in_re[i] = dc_part + f10_part + f200_part + f400_part;
|
||||
sw_in_im[i] = 0.0f;
|
||||
|
||||
// 硬件输入需要转为整数存入无缓存内存
|
||||
uncached_in_re[i] = (int32_t)sw_in_re[i];
|
||||
uncached_in_im[i] = 0;
|
||||
|
||||
// 清理输出内存以防干扰
|
||||
uncached_out_re[i] = 0xDEADBEEF;
|
||||
uncached_out_im[i] = 0xDEADBEEF;
|
||||
}
|
||||
|
||||
unsigned int tick_start, tick_end;
|
||||
unsigned int hw_time, sw_time;
|
||||
|
||||
// 硬件加速 FFT 测试 (纯 DMA 搬运)
|
||||
printf("\n--- Starting Hardware FFT with DMA ---\n");
|
||||
tick_start = get_ns(); // 开始计时
|
||||
|
||||
uint32_t transfer_bytes = FFT_POINT_NUM * 4; // 1024个点 * 4字节
|
||||
|
||||
// MA 将数据从内存搬运到 FFT 输入外设
|
||||
dma_transfer(((uint32_t)hw_in_re_arr & 0x1FFFFFFF), phys_fft_in_re, transfer_bytes);
|
||||
dma_transfer(((uint32_t)hw_in_im_arr & 0x1FFFFFFF), phys_fft_in_im, transfer_bytes);
|
||||
|
||||
// 启动 FFT 并等待计算完成
|
||||
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
|
||||
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {
|
||||
// poll
|
||||
}
|
||||
|
||||
// DMA 将结果从 FFT 输出外设搬回内存
|
||||
dma_transfer(phys_fft_out_re, ((uint32_t)hw_out_re_arr & 0x1FFFFFFF), transfer_bytes);
|
||||
dma_transfer(phys_fft_out_im, ((uint32_t)hw_out_im_arr & 0x1FFFFFFF), transfer_bytes);
|
||||
|
||||
tick_end = get_ns(); // 结束计时
|
||||
hw_time = tick_end - tick_start;
|
||||
|
||||
// 纯软件 FFT 测试
|
||||
printf("--- Starting Software FFT ---\n");
|
||||
tick_start = get_ns();
|
||||
|
||||
sw_fft(sw_in_re, sw_in_im, FFT_POINT_NUM);
|
||||
|
||||
tick_end = get_ns();
|
||||
sw_time = tick_end - tick_start;
|
||||
|
||||
// 打印对比结果
|
||||
printf("\n--- Performance Comparison ---\n");
|
||||
printf("Timer Clock Freq : %lu Hz\n", CONFREG_CLOCKS_PER_SEC);
|
||||
printf("Hardware FFT Time: %u ns (%.3f ms)\n", hw_time, (float)hw_time / 1000000.0);
|
||||
printf("Software FFT Time: %u ns (%.3f ms)\n", sw_time, (float)sw_time / 1000000.0);
|
||||
|
||||
if (hw_time > 0) {
|
||||
printf("Speedup Ratio : %.2fx\n", (float)sw_time / hw_time);
|
||||
}
|
||||
|
||||
printf("\n--- Verification (Only showing Bins with energy > 10) ---\n");
|
||||
for (int i = 0; i < FFT_POINT_NUM; i++) {
|
||||
// CPU 通过无缓存指针读取 DMA 搬运回来的结果
|
||||
int32_t hw_re = uncached_out_re[i];
|
||||
int32_t hw_im = uncached_out_im[i];
|
||||
|
||||
if (fabs((float)hw_re) > 10 || fabs((float)hw_im) > 10) {
|
||||
printf("Bin [%4d] Hz: HW(Re:%6d, Im:%6d) | SW(Re:%6d, Im:%6d)\n",
|
||||
i,
|
||||
hw_re, hw_im,
|
||||
(int)sw_in_re[i] / FFT_POINT_NUM, (int)sw_in_im[i] / FFT_POINT_NUM);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user