feat(ip): integrate 1024-point AXI FFT IP

This commit is contained in:
2026-04-12 22:26:20 +08:00
parent 4217ce2f5b
commit 5aa5ba8e45
7 changed files with 1912 additions and 0 deletions

View File

@@ -0,0 +1,383 @@
`define FFT_CSR_ADDR 16'hf000
`define FFT_IN_RE_BASE 16'h1000
`define FFT_IN_IM_BASE 16'h2000
`define FFT_OUT_RE_BASE 16'h3000
`define FFT_OUT_IM_BASE 16'h4000
module fft_controller #(
parameter FFT_LEN = 1024, // FFT处理长度
parameter BRAM_RD_LATENCY = 2 // 适配 BRAM 的读延迟 (通常 Block RAM 开启输出寄存器为 2)
)(
input aclk,
input aresetn,
// AXI Lite / AXI4 Wrapper ports
input [4 :0] s_awid,
input [31:0] s_awaddr,
input [7 :0] s_awlen,
input [2 :0] s_awsize,
input [1 :0] s_awburst,
input s_awlock,
input [3 :0] s_awcache,
input [2 :0] s_awprot,
input s_awvalid,
output reg s_awready,
input [4 :0] s_wid,
input [31:0] s_wdata,
input [3 :0] s_wstrb,
input s_wlast,
input s_wvalid,
output reg s_wready,
output [4 :0] s_bid,
output [1 :0] s_bresp,
output reg s_bvalid,
input s_bready,
input [4 :0] s_arid,
input [31:0] s_araddr,
input [7 :0] s_arlen,
input [2 :0] s_arsize,
input [1 :0] s_arburst,
input s_arlock,
input [3 :0] s_arcache,
input [2 :0] s_arprot,
input s_arvalid,
output reg s_arready,
output [4 :0] s_rid,
output reg [31:0] s_rdata,
output [1 :0] s_rresp,
output reg s_rlast,
output reg s_rvalid,
input s_rready,
output fft_finish
);
// AXI Burst 内部寄存器
reg [31:0] aw_addr_reg;
reg [1 :0] aw_burst_reg;
reg [4 :0] aw_id_reg;
reg [31:0] ar_addr_reg;
reg [7 :0] ar_len_reg;
reg [1 :0] ar_burst_reg;
reg [4 :0] ar_id_reg;
reg [7 :0] ar_cnt;
// 写通道状态机 (AW, W, B)
localparam W_IDLE = 2'd0;
localparam W_DATA = 2'd1;
localparam W_RESP = 2'd2;
reg [1:0] w_state;
always @(posedge aclk) begin
if (~aresetn) begin
w_state <= W_IDLE;
s_awready <= 1'b1;
s_wready <= 1'b0;
s_bvalid <= 1'b0;
aw_addr_reg <= 32'h0;
aw_burst_reg <= 2'h0;
aw_id_reg <= 5'h0;
end else begin
case (w_state)
W_IDLE: begin
s_awready <= 1'b1;
if (s_awvalid && s_awready) begin
s_awready <= 1'b0;
s_wready <= 1'b1;
aw_addr_reg <= s_awaddr;
aw_burst_reg <= s_awburst;
aw_id_reg <= s_awid;
w_state <= W_DATA;
end
end
W_DATA: begin
// 接收到有效写数据
if (s_wvalid && s_wready) begin
// INCR 模式下地址递增
if (aw_burst_reg == 2'b01) begin
aw_addr_reg <= aw_addr_reg + 4;
end
// 若是最后一拍跳转到发送响应
if (s_wlast) begin
s_wready <= 1'b0;
s_bvalid <= 1'b1;
w_state <= W_RESP;
end
end
end
W_RESP: begin
if (s_bvalid && s_bready) begin
s_bvalid <= 1'b0;
s_awready <= 1'b1; // 准备接收下一次写请求
w_state <= W_IDLE;
end
end
default: w_state <= W_IDLE;
endcase
end
end
// 产生有效的写动作脉冲用于触发 BRAM 写使能
wire w_active = (w_state == W_DATA) && s_wvalid && s_wready;
assign s_bid = aw_id_reg;
assign s_bresp = 2'b00;
// 读通道状态机(AR, R)
localparam R_IDLE = 2'd0;
localparam R_WAIT = 2'd1;
localparam R_DATA = 2'd2;
reg [1:0] r_state;
reg [3:0] r_wait_cnt;
always @(posedge aclk) begin
if (~aresetn) begin
r_state <= R_IDLE;
s_arready <= 1'b1;
s_rvalid <= 1'b0;
s_rlast <= 1'b0;
ar_addr_reg <= 32'h0;
ar_len_reg <= 8'h0;
ar_burst_reg <= 2'h0;
ar_id_reg <= 5'h0;
ar_cnt <= 8'h0;
r_wait_cnt <= 4'h0;
end else begin
case (r_state)
R_IDLE: begin
s_arready <= 1'b1;
if (s_arvalid && s_arready) begin
s_arready <= 1'b0;
ar_addr_reg <= s_araddr;
ar_len_reg <= s_arlen;
ar_burst_reg <= s_arburst;
ar_id_reg <= s_arid;
ar_cnt <= 8'h0;
r_wait_cnt <= BRAM_RD_LATENCY;
r_state <= R_WAIT; // 进入等待以拉取第一笔 BRAM 数据
end
end
R_WAIT: begin
// 等待 BRAM 固定潜伏期结束
if (r_wait_cnt <= 1) begin
s_rvalid <= 1'b1;
s_rlast <= (ar_cnt == ar_len_reg);
r_state <= R_DATA;
r_wait_cnt <= 4'h0;
end else begin
r_wait_cnt <= r_wait_cnt - 1;
end
end
R_DATA: begin
// 数据被总线成功读取
if (s_rvalid && s_rready) begin
if (ar_cnt == ar_len_reg) begin
// Burst 结束
s_rvalid <= 1'b0;
s_rlast <= 1'b0;
s_arready <= 1'b1;
r_state <= R_IDLE;
end else begin
// 准备读取下一拍数据
ar_cnt <= ar_cnt + 1;
// INCR 模式下地址递增
if (ar_burst_reg == 2'b01) begin
ar_addr_reg <= ar_addr_reg + 4;
end
s_rvalid <= 1'b0;
s_rlast <= 1'b0;
r_wait_cnt <= BRAM_RD_LATENCY;
r_state <= R_WAIT; // 再次进入等待状态
end
end
end
default: r_state <= R_IDLE;
endcase
end
end
assign s_rid = ar_id_reg;
assign s_rresp = 2'b00;
// 地址译码
wire hit_in_re = (aw_addr_reg[15:12] == 4'h1);
wire hit_in_im = (aw_addr_reg[15:12] == 4'h2);
wire hit_out_re = (ar_addr_reg[15:12] == 4'h3);
wire hit_out_im = (ar_addr_reg[15:12] == 4'h4);
// 寄存器控制
reg [3:0] fft_ctrl;
reg [3:0] fft_status;
wire [31:0] fft_csr = {24'h0, fft_ctrl, fft_status};
// 只有发生有效的写握手且地址对应 CSR 才写入控制寄存器
wire write_fft_csr = w_active && (aw_addr_reg[15:0] == `FFT_CSR_ADDR);
// 状态机
localparam IDLE = 2'd0;
localparam LOAD = 2'd1;
localparam WAIT_OUT = 2'd2;
localparam DONE = 2'd3;
reg [1:0] state, next_state;
reg [10:0] in_cnt;
reg [10:0] out_cnt;
wire do_en;
wire [31:0] do_re;
wire [31:0] do_im;
always @(posedge aclk) begin
if (~aresetn) state <= IDLE;
else state <= next_state;
end
always @(*) begin
case (state)
IDLE: if (fft_ctrl[0]) next_state = LOAD; else next_state = IDLE;
LOAD: if (in_cnt == FFT_LEN - 1) next_state = WAIT_OUT; else next_state = LOAD;
WAIT_OUT: if (do_en && out_cnt == FFT_LEN - 1) next_state = DONE; else next_state = WAIT_OUT;
DONE: next_state = IDLE;
default: next_state = IDLE;
endcase
end
always @(posedge aclk) begin
if (~aresetn) begin
in_cnt <= 0;
out_cnt <= 0;
end else if (state == IDLE) begin
in_cnt <= 0;
out_cnt <= 0;
end else begin
if (state == LOAD) in_cnt <= in_cnt + 1;
if (do_en) out_cnt <= out_cnt + 1;
end
end
// 更新寄存器与状态
always @(posedge aclk) begin
if (~aresetn) begin
fft_ctrl <= 4'h0;
fft_status <= 4'h0;
end else begin
if (write_fft_csr) begin
fft_ctrl <= s_wdata[7:4];
end else if (state != IDLE) begin
fft_ctrl[0] <= 1'b0;
end
fft_status[0] <= (state != IDLE && state != DONE);
if (write_fft_csr && s_wdata[4]) begin
fft_status[1] <= 1'b0;
end else if (state == DONE) begin
fft_status[1] <= 1'b1;
end
end
end
assign fft_finish = fft_status[1];
// BRAM 接口连线
wire [31:0] in_re_dout;
wire [31:0] in_im_dout;
wire [31:0] out_re_dout;
wire [31:0] out_im_dout;
bram0 u_in0(
.clka (aclk),
.ena (w_active && hit_in_re), // 由 burst写逻辑产生
.wea (1'b1),
.addra (aw_addr_reg[11:2]), // 取当前递增的突发写地址
.dina (s_wdata),
.clkb (aclk),
.enb (state == LOAD),
.addrb (in_cnt[9:0]),
.doutb (in_re_dout)
);
bram0 u_in1(
.clka (aclk),
.ena (w_active && hit_in_im),
.wea (1'b1),
.addra (aw_addr_reg[11:2]),
.dina (s_wdata),
.clkb (aclk),
.enb (state == LOAD),
.addrb (in_cnt[9:0]),
.doutb (in_im_dout)
);
// 对于输出 BRAM采用原先的位反转Bit-reverse读取逻辑
wire [9:0] b_rd_addr = ar_addr_reg[11:2];
wire [9:0] bit_reversed_index = {
b_rd_addr[0], b_rd_addr[1], b_rd_addr[2], b_rd_addr[3], b_rd_addr[4],
b_rd_addr[5], b_rd_addr[6], b_rd_addr[7], b_rd_addr[8], b_rd_addr[9]
};
bram0 u_out0(
.clka (aclk),
.ena (do_en),
.wea (1'b1),
.addra (out_cnt[9:0]),
.dina (do_re),
.clkb (aclk),
.enb (r_state != R_IDLE), // 当处于读突发事务时始终使能 BRAM
.addrb (bit_reversed_index),
.doutb (out_re_dout)
);
bram0 u_out1(
.clka (aclk),
.ena (do_en),
.wea (1'b1),
.addra (out_cnt[9:0]),
.dina (do_im),
.clkb (aclk),
.enb (r_state != R_IDLE),
.addrb (bit_reversed_index),
.doutb (out_im_dout)
);
// 支持 1 到 N 拍的读延迟,让 di_en 严格对齐 BRAM 的吐出数据
reg [3:0] di_en_shift;
always @(posedge aclk) begin
if (~aresetn) di_en_shift <= 0;
else di_en_shift <= {di_en_shift[2:0], (state == LOAD)};
end
wire di_en_r = (BRAM_RD_LATENCY == 1) ? di_en_shift[0] :
(BRAM_RD_LATENCY == 2) ? di_en_shift[1] :
(BRAM_RD_LATENCY == 3) ? di_en_shift[2] : di_en_shift[3];
FFT #(
.WIDTH(32)
) u_FFT (
.clock (aclk),
.reset (~aresetn),
.di_en (di_en_r),
.di_re (in_re_dout),
.di_im (in_im_dout),
.do_en (do_en),
.do_re (do_re),
.do_im (do_im)
);
// 读取返回数据选择
wire [31:0] rdata_d =
(ar_addr_reg[15:0] == `FFT_CSR_ADDR) ? fft_csr :
hit_out_re ? out_re_dout :
hit_out_im ? out_im_dout :
32'h0;
always @(*) begin
s_rdata = rdata_d;
end
endmodule