Compare commits

..

10 Commits

21 changed files with 787 additions and 263 deletions

31
README.MD Normal file
View File

@@ -0,0 +1,31 @@
# 2026年全国大学生集成电路创新创业大赛 - 龙芯中科杯
本仓库为 2026年全国大学生集成电路创新创业大赛CICC “龙芯中科”企业命题的参赛代码仓库。
## 团队信息
- 队伍编号CICC1008439
- 队伍名称:碳基生物
- 参赛赛道:龙芯中科企业命题
## 项目简介
本项目基于龙芯指令集架构Loongson Architecture针对特定应用场景如数字信号处理/嵌入式系统优化设计了一套高性能的系统级芯片SoC或核心外设子系统。旨在充分发挥龙芯架构的优势通过自定义硬件加速器和高效的总线管理提升系统整体效能。
## 已完成功能模块
- 外部中断控制器
- FFT 硬件加速器
- 8-mux mm2mm DMA 控制器
## 目录结构
```
.
├── fpga/ # Vivado工程文件与Tcl自动化构建脚本
├── doc/ # 参赛技术文档、设计说明书及测试报告
├── rtl/ # 硬件源代码 (龙芯核、中断控制器、FFT、DMA等)
├── sdk/ # 嵌入式软件开发 (针对龙芯架构的C语言驱动与应用)
├── sim/ # 仿真验证环境 (Testbench与仿真脚本)
└── README.md # 项目总体说明与快速上手指南
```

View File

@@ -24,11 +24,11 @@ module axi_dvi #
input [3:0] s_awcache,
input [2:0] s_awprot,
input s_wvalid,
output s_wready,
output reg s_wready,
input [31:0] s_wdata,
input [3:0] s_wstrb,
input s_wlast,
output s_bvalid,
output reg s_bvalid,
input s_bready,
output [4:0] s_bid,
output [1:0] s_bresp,
@@ -42,12 +42,12 @@ module axi_dvi #
input [0:0] s_arlock,
input [3:0] s_arcache,
input [2:0] s_arprot,
output s_rvalid,
output reg s_rvalid,
input s_rready,
output [31:0] s_rdata,
output reg [31:0] s_rdata,
output [4:0] s_rid,
output [1:0] s_rresp,
output s_rlast,
output reg s_rlast,
output video_clk, // Video clock signal
output hsync, // Horizontal sync signal
@@ -64,7 +64,6 @@ module axi_dvi #
reg [31:0] DVI_RECT_DIR,DVI_RECT_L_W,DVI_SQU_DIR,DVI_SQU_R;
reg busy,write,R_or_W;
reg s_wready;
wire ar_enter = s_arvalid & s_arready;
wire r_retire = s_rvalid & s_rready & s_rlast;
@@ -126,8 +125,6 @@ module axi_dvi #
else if(w_enter & s_wlast) s_wready <= 1'b0;
reg [31:0] s_rdata;
reg s_rvalid,s_rlast;
wire [31:0] rdata_d = buf_addr[15:0] == 16'h0 ? DVI_RECT_DIR :
buf_addr[15:0] == 16'h4 ? DVI_RECT_L_W :
buf_addr[15:0] == 16'h8 ? DVI_SQU_DIR :
@@ -152,7 +149,6 @@ module axi_dvi #
end
end
reg s_bvalid;
always@(posedge aclk) begin
if(~aresetn) s_bvalid <= 1'b0;
else if(w_enter) s_bvalid <= 1'b1;

View File

@@ -134,6 +134,12 @@ initial begin
end
end
localparam SAFE_EXP_MSB = (AXIL_ADDR_BIT_OFFSET > AXI_ADDR_BIT_OFFSET) ? AXIL_ADDR_BIT_OFFSET - 1 : AXI_ADDR_BIT_OFFSET;
localparam SAFE_EXP_LSB = AXI_ADDR_BIT_OFFSET;
localparam SAFE_NAR_MSB = (AXI_ADDR_BIT_OFFSET > AXIL_ADDR_BIT_OFFSET) ? AXI_ADDR_BIT_OFFSET - 1 : AXIL_ADDR_BIT_OFFSET;
localparam SAFE_NAR_LSB = AXIL_ADDR_BIT_OFFSET;
localparam [1:0]
STATE_IDLE = 2'd0,
STATE_DATA = 2'd1,
@@ -286,7 +292,7 @@ always @* begin
if (m_axil_rready && m_axil_rvalid) begin
s_axi_rid_next = id_reg;
s_axi_rdata_next = m_axil_rdata >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
s_axi_rdata_next = m_axil_rdata >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
s_axi_rresp_next = m_axil_rresp;
s_axi_rlast_next = 1'b0;
s_axi_rvalid_next = 1'b1;
@@ -316,7 +322,7 @@ always @* begin
s_axi_rid_next = id_reg;
data_next = m_axil_rdata;
resp_next = m_axil_rresp;
s_axi_rdata_next = m_axil_rdata >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
s_axi_rdata_next = m_axil_rdata >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
s_axi_rresp_next = m_axil_rresp;
s_axi_rlast_next = 1'b0;
s_axi_rvalid_next = 1'b1;
@@ -346,7 +352,7 @@ always @* begin
if (s_axi_rready || !s_axi_rvalid) begin
s_axi_rid_next = id_reg;
s_axi_rdata_next = data_reg >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
s_axi_rdata_next = data_reg >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
s_axi_rresp_next = resp_reg;
s_axi_rlast_next = 1'b0;
s_axi_rvalid_next = 1'b1;
@@ -412,7 +418,7 @@ always @* begin
m_axil_rready_next = !s_axi_rvalid && !m_axil_arvalid;
if (m_axil_rready && m_axil_rvalid) begin
data_next[addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = m_axil_rdata;
data_next[addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = m_axil_rdata;
if (m_axil_rresp) begin
resp_next = m_axil_rresp;
end

View File

@@ -146,6 +146,12 @@ localparam [1:0]
STATE_DATA_2 = 2'd2,
STATE_RESP = 2'd3;
// 添加安全的位选边界防止 ModelSim Range Reversed 错误
localparam SAFE_EXP_MSB = (AXIL_ADDR_BIT_OFFSET > AXI_ADDR_BIT_OFFSET) ? AXIL_ADDR_BIT_OFFSET - 1 : AXI_ADDR_BIT_OFFSET;
localparam SAFE_EXP_LSB = AXI_ADDR_BIT_OFFSET;
localparam SAFE_NAR_MSB = (AXI_ADDR_BIT_OFFSET > AXIL_ADDR_BIT_OFFSET) ? AXI_ADDR_BIT_OFFSET - 1 : AXIL_ADDR_BIT_OFFSET;
localparam SAFE_NAR_LSB = AXIL_ADDR_BIT_OFFSET;
reg [1:0] state_reg = STATE_IDLE, state_next;
reg [AXI_ID_WIDTH-1:0] id_reg = {AXI_ID_WIDTH{1'b0}}, id_next;
@@ -335,7 +341,7 @@ always @* begin
if (s_axi_wready && s_axi_wvalid) begin
m_axil_wdata_next = {(AXIL_WORD_WIDTH/AXI_WORD_WIDTH){s_axi_wdata}};
m_axil_wstrb_next = s_axi_wstrb << (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_STRB_WIDTH);
m_axil_wstrb_next = s_axi_wstrb << (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_STRB_WIDTH);
m_axil_wvalid_next = 1'b1;
burst_next = burst_reg - 1;
burst_active_next = burst_reg != 0;
@@ -354,13 +360,13 @@ always @* begin
if (CONVERT_NARROW_BURST) begin
for (i = 0; i < AXI_WORD_WIDTH; i = i + 1) begin
if (s_axi_wstrb[i]) begin
data_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH+i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE] = s_axi_wdata[i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE];
strb_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_STRB_WIDTH+i] = 1'b1;
data_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_DATA_WIDTH+i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE] = s_axi_wdata[i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE];
strb_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_STRB_WIDTH+i] = 1'b1;
end
end
end else begin
data_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = s_axi_wdata;
strb_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_STRB_WIDTH +: SEGMENT_STRB_WIDTH] = s_axi_wstrb;
data_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = s_axi_wdata;
strb_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_STRB_WIDTH +: SEGMENT_STRB_WIDTH] = s_axi_wstrb;
end
m_axil_wdata_next = data_next;
m_axil_wstrb_next = strb_next;
@@ -451,8 +457,8 @@ always @* begin
if (s_axi_wready && s_axi_wvalid) begin
data_next = s_axi_wdata;
strb_next = s_axi_wstrb;
m_axil_wdata_next = s_axi_wdata >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_DATA_WIDTH);
m_axil_wstrb_next = s_axi_wstrb >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_STRB_WIDTH);
m_axil_wdata_next = s_axi_wdata >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_DATA_WIDTH);
m_axil_wstrb_next = s_axi_wstrb >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_STRB_WIDTH);
m_axil_wvalid_next = 1'b1;
burst_next = burst_reg - 1;
burst_active_next = burst_reg != 0;
@@ -469,8 +475,8 @@ always @* begin
s_axi_wready_next = 1'b0;
if (!m_axil_wvalid || m_axil_wready) begin
m_axil_wdata_next = data_reg >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_DATA_WIDTH);
m_axil_wstrb_next = strb_reg >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_STRB_WIDTH);
m_axil_wdata_next = data_reg >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_DATA_WIDTH);
m_axil_wstrb_next = strb_reg >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_STRB_WIDTH);
m_axil_wvalid_next = 1'b1;
addr_next = (addr_reg + (1 << master_burst_size_reg)) & ({ADDR_WIDTH{1'b1}} << master_burst_size_reg);
last_segment_next = addr_next[burst_size_reg] != addr_reg[burst_size_reg];

View File

@@ -0,0 +1,381 @@
// ============================================================================
// snix_axil_cdma_mux.sv
// Multi-Channel AXI-Lite MUX Wrapper for snix_axi_mm2mm engine
//
// Register Map (per channel, offset 0x40):
// 0x00: READ_ADDR (Source Address)
// 0x04: WRITE_ADDR (Destination Address)
// 0x08: LENGTH (Transfer length in bytes)
// 0x0C: TAG (User tag, purely for software tracking)
// 0x10: CTRL (Write 1 to bit 0 to trigger)
// [5:3] = AXI AxSIZE (0=1B, 1=2B, 2=4B, 3=8B...)
// [13:6] = AXI AxLEN (0=1 beat, 15=16 beats...)
// 0x14: STATUS (Bit 0: Busy RO, Bit 1: Done W1C)
// ============================================================================
`timescale 1ns / 1ps
module snix_axil_cdma_mux #(
parameter int ADDR_WIDTH = 32,
parameter int DATA_WIDTH = 32, // Matches engine default
parameter int AXIL_ADDR_WIDTH = 32,
parameter int AXIL_DATA_WIDTH = 32,
parameter int ID_WIDTH = 4,
parameter int USER_WIDTH = 1,
parameter int PORTS = 8,
parameter int FIFO_DEPTH = 16
) (
input logic clk,
input logic rst_n,
// ==========================================
// AXI-Lite Slave Interface (CPU CSR Access)
// ==========================================
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_awaddr,
input logic s_axil_awvalid,
output logic s_axil_awready,
input logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata,
input logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb,
input logic s_axil_wvalid,
output logic s_axil_wready,
output logic [1:0] s_axil_bresp,
output logic s_axil_bvalid,
input logic s_axil_bready,
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_araddr,
input logic s_axil_arvalid,
output logic s_axil_arready,
output logic [AXIL_DATA_WIDTH-1:0] s_axil_rdata,
output logic [1:0] s_axil_rresp,
output logic s_axil_rvalid,
input logic s_axil_rready,
// ==========================================
// AXI4 Master Interface (To Crossbar/Memory)
// ==========================================
output logic [ID_WIDTH-1:0] mm2mm_awid,
output logic [ADDR_WIDTH-1:0] mm2mm_awaddr,
output logic [7:0] mm2mm_awlen,
output logic [2:0] mm2mm_awsize,
output logic [1:0] mm2mm_awburst,
output logic mm2mm_awlock,
output logic [3:0] mm2mm_awcache,
output logic [2:0] mm2mm_awprot,
output logic [3:0] mm2mm_awqos,
output logic [USER_WIDTH-1:0] mm2mm_awuser,
output logic mm2mm_awvalid,
input logic mm2mm_awready,
output logic [DATA_WIDTH-1:0] mm2mm_wdata,
output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb,
output logic mm2mm_wlast,
output logic [USER_WIDTH-1:0] mm2mm_wuser,
output logic mm2mm_wvalid,
input logic mm2mm_wready,
input logic [ID_WIDTH-1:0] mm2mm_bid,
input logic [1:0] mm2mm_bresp,
input logic [USER_WIDTH-1:0] mm2mm_buser,
input logic mm2mm_bvalid,
output logic mm2mm_bready,
output logic [ID_WIDTH-1:0] mm2mm_arid,
output logic [ADDR_WIDTH-1:0] mm2mm_araddr,
output logic [7:0] mm2mm_arlen,
output logic [2:0] mm2mm_arsize,
output logic [1:0] mm2mm_arburst,
output logic mm2mm_arlock,
output logic [3:0] mm2mm_arcache,
output logic [2:0] mm2mm_arprot,
output logic [3:0] mm2mm_arqos,
output logic [USER_WIDTH-1:0] mm2mm_aruser,
output logic mm2mm_arvalid,
input logic mm2mm_arready,
input logic [ID_WIDTH-1:0] mm2mm_rid,
input logic [DATA_WIDTH-1:0] mm2mm_rdata,
input logic [1:0] mm2mm_rresp,
input logic mm2mm_rlast,
input logic [USER_WIDTH-1:0] mm2mm_ruser,
input logic mm2mm_rvalid,
output logic mm2mm_rready,
// Global Interrupt (OR'd from all channels)
output logic dma_finish
);
// ==========================================
// Local Parameters & Utilities
// ==========================================
localparam int CH_BITS = $clog2(PORTS);
// Function to safely apply WSTRB to 32-bit registers
function automatic logic [31:0] apply_wstrb(
input logic [31:0] old_val,
input logic [31:0] new_val,
input logic [3:0] wstrb
);
logic [31:0] res;
res[7:0] = wstrb[0] ? new_val[7:0] : old_val[7:0];
res[15:8] = wstrb[1] ? new_val[15:8] : old_val[15:8];
res[23:16] = wstrb[2] ? new_val[23:16] : old_val[23:16];
res[31:24] = wstrb[3] ? new_val[31:24] : old_val[31:24];
return res;
endfunction
// ==========================================
// Internal Registers (Per Channel)
// ==========================================
logic [ADDR_WIDTH-1:0] ch_src_addr [PORTS];
logic [ADDR_WIDTH-1:0] ch_dst_addr [PORTS];
logic [31:0] ch_len [PORTS];
logic [31:0] ch_tag [PORTS];
logic [31:0] ch_ctrl [PORTS];
logic [PORTS-1:0] ch_req; // Pending requests (Busy)
logic [PORTS-1:0] ch_done; // Completion flags
logic [PORTS-1:0] arb_set_done; // From Arbiter to CSR
// ==========================================
// Address Decoding (0x40 offset per channel)
// ==========================================
wire [CH_BITS-1:0] wr_ch = s_axil_awaddr[6 +: CH_BITS];
wire [5:0] wr_reg = s_axil_awaddr[5:0];
wire [CH_BITS-1:0] rd_ch = s_axil_araddr[6 +: CH_BITS];
wire [5:0] rd_reg = s_axil_araddr[5:0];
// ==========================================
// AXI-Lite Slave Logic (Robust Backpressure)
// ==========================================
assign s_axil_bresp = 2'b00;
assign s_axil_rresp = 2'b00;
// Write Path Handshake
always_ff @(posedge clk) begin
if (!rst_n) begin
s_axil_awready <= 1'b0;
s_axil_wready <= 1'b0;
end else begin
if (s_axil_awvalid && s_axil_wvalid && !s_axil_awready && (!s_axil_bvalid || s_axil_bready)) begin
s_axil_awready <= 1'b1;
s_axil_wready <= 1'b1;
end else begin
s_axil_awready <= 1'b0;
s_axil_wready <= 1'b0;
end
end
end
wire do_write = s_axil_awready && s_axil_awvalid && s_axil_wready && s_axil_wvalid;
always_ff @(posedge clk) begin
if (!rst_n) begin
s_axil_bvalid <= 1'b0;
end else begin
if (do_write) begin
s_axil_bvalid <= 1'b1;
end else if (s_axil_bready && s_axil_bvalid) begin
s_axil_bvalid <= 1'b0;
end
end
end
// Read Path Handshake
always_ff @(posedge clk) begin
if (!rst_n) begin
s_axil_arready <= 1'b0;
end else begin
if (s_axil_arvalid && !s_axil_arready && (!s_axil_rvalid || s_axil_rready)) begin
s_axil_arready <= 1'b1;
end else begin
s_axil_arready <= 1'b0;
end
end
end
wire do_read = s_axil_arready && s_axil_arvalid;
always_ff @(posedge clk) begin
if (!rst_n) begin
s_axil_rvalid <= 1'b0;
s_axil_rdata <= '0;
end else begin
if (do_read) begin
s_axil_rvalid <= 1'b1;
if (rd_ch < PORTS) begin
case (rd_reg)
6'h00: s_axil_rdata <= ch_src_addr[rd_ch];
6'h04: s_axil_rdata <= ch_dst_addr[rd_ch];
6'h08: s_axil_rdata <= ch_len[rd_ch];
6'h0C: s_axil_rdata <= ch_tag[rd_ch];
6'h10: s_axil_rdata <= ch_ctrl[rd_ch];
6'h14: s_axil_rdata <= {30'd0, ch_done[rd_ch], ch_req[rd_ch]};
default: s_axil_rdata <= 32'd0;
endcase
end else begin
s_axil_rdata <= 32'd0; // Out of bounds
end
end else if (s_axil_rready && s_axil_rvalid) begin
s_axil_rvalid <= 1'b0;
end
end
end
// ==========================================
// Register File Write Logic
// ==========================================
assign dma_finish = |ch_done;
always_ff @(posedge clk) begin
if (!rst_n) begin
ch_req <= '0;
ch_done <= '0;
for (int i=0; i<PORTS; i++) begin
ch_src_addr[i] <= '0; ch_dst_addr[i] <= '0;
ch_len[i] <= '0; ch_tag[i] <= '0; ch_ctrl[i] <= '0;
end
end else begin
// 1. Process Hardware Status Updates (Highest Priority for Done flag)
for (int i=0; i<PORTS; i++) begin
if (arb_set_done[i]) begin
ch_req[i] <= 1'b0;
ch_done[i] <= 1'b1;
end
end
// 2. Process CPU Writes
if (do_write && wr_ch < PORTS) begin
case (wr_reg)
6'h00: if (!ch_req[wr_ch]) ch_src_addr[wr_ch] <= apply_wstrb(ch_src_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
6'h04: if (!ch_req[wr_ch]) ch_dst_addr[wr_ch] <= apply_wstrb(ch_dst_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
6'h08: if (!ch_req[wr_ch]) ch_len[wr_ch] <= apply_wstrb(ch_len[wr_ch], s_axil_wdata, s_axil_wstrb);
6'h0C: if (!ch_req[wr_ch]) ch_tag[wr_ch] <= apply_wstrb(ch_tag[wr_ch], s_axil_wdata, s_axil_wstrb);
6'h10: begin
ch_ctrl[wr_ch] <= apply_wstrb(ch_ctrl[wr_ch], s_axil_wdata, s_axil_wstrb);
// Trigger Bit Processing
if (s_axil_wstrb[0] && s_axil_wdata[0]) begin
ch_req[wr_ch] <= 1'b1;
// Clean up done bit automatically upon new start
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
end
end
6'h14: begin
// Software W1C for Done flag (Bit 1)
// ONLY clear if hardware is not setting it in the exact same cycle
if (s_axil_wstrb[0] && s_axil_wdata[1]) begin
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
end
end
endcase
end
end
end
// ==========================================
// Round-Robin Arbiter & Engine Driver
// ==========================================
typedef enum logic [1:0] {IDLE, RUN} state_t;
state_t state;
logic [CH_BITS-1:0] cur_ch;
logic [CH_BITS-1:0] rr_ptr;
// Interfaces to Engine
logic engine_start;
logic [ADDR_WIDTH-1:0] engine_src;
logic [ADDR_WIDTH-1:0] engine_dst;
logic [31:0] engine_bytes;
logic [7:0] engine_len;
logic [2:0] engine_size;
logic engine_done;
// Dynamic routing to the engine based on current active channel
assign engine_src = ch_src_addr[cur_ch];
assign engine_dst = ch_dst_addr[cur_ch];
assign engine_bytes = ch_len[cur_ch];
assign engine_len = ch_ctrl[cur_ch][13:6];
assign engine_size = ch_ctrl[cur_ch][5:3];
always_ff @(posedge clk) begin
if (!rst_n) begin
state <= IDLE;
engine_start <= 1'b0;
rr_ptr <= '0;
cur_ch <= '0;
arb_set_done <= '0;
end else begin
arb_set_done <= '0;
engine_start <= 1'b0;
case (state)
IDLE: begin
for (int i = 0; i < PORTS; i++) begin
logic [CH_BITS:0] check_ch_ext;
logic [CH_BITS-1:0] check_ch;
// Calculate next channel safely avoiding modulo operators in loop
check_ch_ext = {1'b0, rr_ptr} + i[CH_BITS:0];
check_ch = (check_ch_ext >= PORTS) ? (check_ch_ext - PORTS) : check_ch_ext[CH_BITS-1:0];
if (ch_req[check_ch] && !arb_set_done[check_ch]) begin
cur_ch <= check_ch;
rr_ptr <= (check_ch == (PORTS - 1)) ? '0 : (check_ch + 1);
engine_start <= 1'b1;
state <= RUN;
break;
end
end
end
RUN: begin
if (engine_done) begin
arb_set_done[cur_ch] <= 1'b1;
state <= IDLE;
end
// Optional: Add a watchdog timeout counter here if dealing with untrusted PCIe/AXI endpoints
end
endcase
end
end
// ==========================================
// Instantiate The Original Core Engine
// ==========================================
snix_axi_mm2mm #(
.ADDR_WIDTH(ADDR_WIDTH),
.DATA_WIDTH(DATA_WIDTH),
.ID_WIDTH (ID_WIDTH),
.USER_WIDTH(USER_WIDTH),
.FIFO_DEPTH(FIFO_DEPTH)
) u_core_engine (
.clk (clk),
.rst_n (rst_n),
.ctrl_start (engine_start),
.ctrl_stop (1'b0), // Tied off; can be wired if global abort is needed
.ctrl_src_addr (engine_src),
.ctrl_dst_addr (engine_dst),
.ctrl_len (engine_len),
.ctrl_size (engine_size),
.ctrl_transfer_len (engine_bytes),
.ctrl_done (engine_done),
// AXI4 Port Connections
.mm2mm_awid (mm2mm_awid), .mm2mm_awaddr (mm2mm_awaddr),
.mm2mm_awlen (mm2mm_awlen), .mm2mm_awsize (mm2mm_awsize),
.mm2mm_awburst(mm2mm_awburst),.mm2mm_awlock (mm2mm_awlock),
.mm2mm_awcache(mm2mm_awcache),.mm2mm_awprot (mm2mm_awprot),
.mm2mm_awqos (mm2mm_awqos), .mm2mm_awuser (mm2mm_awuser),
.mm2mm_awvalid(mm2mm_awvalid),.mm2mm_awready(mm2mm_awready),
.mm2mm_wdata (mm2mm_wdata), .mm2mm_wstrb (mm2mm_wstrb),
.mm2mm_wlast (mm2mm_wlast), .mm2mm_wuser (mm2mm_wuser),
.mm2mm_wvalid (mm2mm_wvalid), .mm2mm_wready (mm2mm_wready),
.mm2mm_bid (mm2mm_bid), .mm2mm_bresp (mm2mm_bresp),
.mm2mm_buser (mm2mm_buser), .mm2mm_bvalid (mm2mm_bvalid),
.mm2mm_bready (mm2mm_bready),
.mm2mm_arid (mm2mm_arid), .mm2mm_araddr (mm2mm_araddr),
.mm2mm_arlen (mm2mm_arlen), .mm2mm_arsize (mm2mm_arsize),
.mm2mm_arburst(mm2mm_arburst),.mm2mm_arlock (mm2mm_arlock),
.mm2mm_arcache(mm2mm_arcache),.mm2mm_arprot (mm2mm_arprot),
.mm2mm_arqos (mm2mm_arqos), .mm2mm_aruser (mm2mm_aruser),
.mm2mm_arvalid(mm2mm_arvalid),.mm2mm_arready(mm2mm_arready),
.mm2mm_rid (mm2mm_rid), .mm2mm_rdata (mm2mm_rdata),
.mm2mm_rresp (mm2mm_rresp), .mm2mm_rlast (mm2mm_rlast),
.mm2mm_ruser (mm2mm_ruser), .mm2mm_rvalid (mm2mm_rvalid),
.mm2mm_rready (mm2mm_rready)
);
endmodule

View File

@@ -30,6 +30,8 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
------------------------------------------------------------------------------*/
`timescale 1ns / 1ps
//1f00_0000 apb
//1f10_0000 dvi
//1f20_0000 confreg
@@ -1475,43 +1477,6 @@ fft_controller u_fft_controller(
.fft_finish (fft_finish )
);
// assign dma_m_arid = 4'b0 ;
// assign dma_m_araddr = 32'h0;
// assign dma_m_arlen = 8'b0 ;
// assign dma_m_arsize = 3'b0 ;
// assign dma_m_arburst = 2'b0;
// assign dma_m_arlock = 1'b0;
// assign dma_m_arcache = 4'b0;
// assign dma_m_arprot = 3'b0;
// assign dma_m_arvalid = 1'b0;
// assign dma_m_rready = 1'b1;
// assign dma_m_awid = 4'b0;
// assign dma_m_awaddr = 32'b0;
// assign dma_m_awlen = 8'b0;
// assign dma_m_awsize = 3'b0;
// assign dma_m_awburst = 2'b0;
// assign dma_m_awlock = 1'b0;
// assign dma_m_awcache = 4'b0;
// assign dma_m_awprot = 3'b0;
// assign dma_m_awvalid = 1'b0;
// assign dma_m_wid = 4'b0;
// assign dma_m_wdata = 32'b0;
// assign dma_m_wstrb = 4'b0;
// assign dma_m_wlast = 1'b0;
// assign dma_m_wvalid = 1'b0;
// assign dma_m_bready = 1'b1;
// assign dma_s_arready = 1'b1;
// assign dma_s_rid = 5'b0;
// assign dma_s_rdata = 32'b0;
// assign dma_s_rresp = 2'b0;
// assign dma_s_rlast = 1'b0;
// assign dma_s_rvalid = 1'b0;
// assign dma_s_awready = 1'b1;
// assign dma_s_wready = 1'b1;
// assign dma_s_bid = 5'b0;
// assign dma_s_bresp = 2'b0;
// assign dma_s_bvalid = 1'b0;
wire [31:0] cdma_s_awaddr;
wire [2:0] cdma_s_awprot;
wire cdma_s_awvalid;
@@ -1600,77 +1565,84 @@ u_axi_axil_adapter(
.m_axil_rready(cdma_s_rready)
);
snix_axi_cdma #(
.ADDR_WIDTH (32 ),
.DATA_WIDTH (32 ),
.AXIL_ADDR_WIDTH (32 ),
.AXIL_DATA_WIDTH (32 ),
.ID_WIDTH (4 ),
.USER_WIDTH (1 ))
u_snix_axi_cdma(
.clk (sys_clk ),
.rst_n (sys_resetn ),
.s_axil_awaddr (cdma_s_awaddr ),
.s_axil_awvalid (cdma_s_awvalid ),
.s_axil_awready (cdma_s_awready ),
.s_axil_wdata (cdma_s_wdata ),
.s_axil_wstrb (cdma_s_wstrb ),
.s_axil_wvalid (cdma_s_wvalid ),
.s_axil_wready (cdma_s_wready ),
.s_axil_bresp (cdma_s_bresp ),
.s_axil_bvalid (cdma_s_bvalid ),
.s_axil_bready (cdma_s_bready ),
.s_axil_araddr (cdma_s_araddr ),
.s_axil_arvalid (cdma_s_arvalid ),
.s_axil_arready (cdma_s_arready ),
.s_axil_rdata (cdma_s_rdata ),
.s_axil_rresp (cdma_s_rresp ),
.s_axil_rvalid (cdma_s_rvalid ),
.s_axil_rready (cdma_s_rready ),
// 8 通道 DMA
snix_axil_cdma_mux #(
.ADDR_WIDTH (32),
.DATA_WIDTH (32), // 适配你的 32-bit 总线
.AXIL_ADDR_WIDTH (32),
.AXIL_DATA_WIDTH (32),
.ID_WIDTH (4), // 匹配 Crossbar Master ID 宽度
.USER_WIDTH (1),
.PORTS (4), // 8 个通道
.FIFO_DEPTH (64)
) u_snix_axil_cdma_mux_8ch (
.clk (sys_clk),
.rst_n (sys_resetn), // 低电平复位
.mm2mm_awid (dma_m_awid ),
.mm2mm_awaddr (dma_m_awaddr ),
.mm2mm_awlen (dma_m_awlen ),
.mm2mm_awsize (dma_m_awsize ),
.mm2mm_awburst (dma_m_awburst ),
.mm2mm_awlock (dma_m_awlock ),
.mm2mm_awcache (dma_m_awcache ),
.mm2mm_awprot (dma_m_awprot ),
.mm2mm_awqos ( ),
.mm2mm_awuser ( ),
.mm2mm_awvalid (dma_m_awvalid ),
.mm2mm_awready (dma_m_awready ),
.mm2mm_wdata (dma_m_wdata ),
.mm2mm_wstrb (dma_m_wstrb ),
.mm2mm_wlast (dma_m_wlast ),
.mm2mm_wuser ( ),
.mm2mm_wvalid (dma_m_wvalid ),
.mm2mm_wready (dma_m_wready ),
.mm2mm_bid (dma_m_bid ),
.mm2mm_bresp (dma_m_bresp ),
// AXI-Lite 从机接口 ( CPU 发来的配置请求)
.s_axil_awaddr (cdma_s_awaddr),
.s_axil_awvalid (cdma_s_awvalid),
.s_axil_awready (cdma_s_awready),
.s_axil_wdata (cdma_s_wdata),
.s_axil_wstrb (cdma_s_wstrb),
.s_axil_wvalid (cdma_s_wvalid),
.s_axil_wready (cdma_s_wready),
.s_axil_bresp (cdma_s_bresp),
.s_axil_bvalid (cdma_s_bvalid),
.s_axil_bready (cdma_s_bready),
.s_axil_araddr (cdma_s_araddr),
.s_axil_arvalid (cdma_s_arvalid),
.s_axil_arready (cdma_s_arready),
.s_axil_rdata (cdma_s_rdata),
.s_axil_rresp (cdma_s_rresp),
.s_axil_rvalid (cdma_s_rvalid),
.s_axil_rready (cdma_s_rready),
// AXI4 主机接口 ( Crossbar 去搬运数据)
.mm2mm_awid (dma_m_awid),
.mm2mm_awaddr (dma_m_awaddr),
.mm2mm_awlen (dma_m_awlen),
.mm2mm_awsize (dma_m_awsize),
.mm2mm_awburst (dma_m_awburst),
.mm2mm_awlock (dma_m_awlock),
.mm2mm_awcache (dma_m_awcache),
.mm2mm_awprot (dma_m_awprot),
.mm2mm_awqos (), // 悬空即可
.mm2mm_awuser (), // 悬空即可
.mm2mm_awvalid (dma_m_awvalid),
.mm2mm_awready (dma_m_awready),
.mm2mm_wdata (dma_m_wdata),
.mm2mm_wstrb (dma_m_wstrb),
.mm2mm_wlast (dma_m_wlast),
.mm2mm_wuser (), // 悬空即可
.mm2mm_wvalid (dma_m_wvalid),
.mm2mm_wready (dma_m_wready),
.mm2mm_bid (dma_m_bid),
.mm2mm_bresp (dma_m_bresp),
.mm2mm_buser (1'b0),
.mm2mm_bvalid (dma_m_bvalid ),
.mm2mm_bready (dma_m_bready ),
.mm2mm_arid (dma_m_arid ),
.mm2mm_araddr (dma_m_araddr ),
.mm2mm_arlen (dma_m_arlen ),
.mm2mm_arsize (dma_m_arsize ),
.mm2mm_arburst (dma_m_arburst ),
.mm2mm_arlock (dma_m_arlock ),
.mm2mm_arcache (dma_m_arcache ),
.mm2mm_arprot (dma_m_arprot ),
.mm2mm_arqos ( ),
.mm2mm_aruser ( ),
.mm2mm_arvalid (dma_m_arvalid ),
.mm2mm_arready (dma_m_arready ),
.mm2mm_rid (dma_m_rid ),
.mm2mm_rdata (dma_m_rdata ),
.mm2mm_rresp (dma_m_rresp ),
.mm2mm_rlast (dma_m_rlast ),
.mm2mm_bvalid (dma_m_bvalid),
.mm2mm_bready (dma_m_bready),
.mm2mm_arid (dma_m_arid),
.mm2mm_araddr (dma_m_araddr),
.mm2mm_arlen (dma_m_arlen),
.mm2mm_arsize (dma_m_arsize),
.mm2mm_arburst (dma_m_arburst),
.mm2mm_arlock (dma_m_arlock),
.mm2mm_arcache (dma_m_arcache),
.mm2mm_arprot (dma_m_arprot),
.mm2mm_arqos (), // 悬空即可
.mm2mm_aruser (), // 悬空即可
.mm2mm_arvalid (dma_m_arvalid),
.mm2mm_arready (dma_m_arready),
.mm2mm_rid (dma_m_rid),
.mm2mm_rdata (dma_m_rdata),
.mm2mm_rresp (dma_m_rresp),
.mm2mm_rlast (dma_m_rlast),
.mm2mm_ruser (1'b0),
.mm2mm_rvalid (dma_m_rvalid ),
.mm2mm_rready (dma_m_rready ),
.mm2mm_rvalid (dma_m_rvalid),
.mm2mm_rready (dma_m_rready),
// 全局中断输出
.dma_finish (dma_finish)
);

View File

@@ -39,7 +39,9 @@ C_SRCS += $(COMMON_DIR)/drivers/core_time.c
C_SRCS += $(COMMON_DIR)/drivers/common_func.c
C_SRCS += $(COMMON_DIR)/drivers/dvi.c \
$(COMMON_DIR)/drivers/led.c \
$(COMMON_DIR)/drivers/seg7.c
$(COMMON_DIR)/drivers/seg7.c \
$(COMMON_DIR)/drivers/dma.c \
$(COMMON_DIR)/drivers/fft.c
INCLUDES += -I./ \
-I$(COMMON_DIR)/include \

View File

@@ -0,0 +1,37 @@
#include "dma.h"
void dma_start_transfer(int ch_id, uint32_t src, uint32_t dst, uint32_t bytes, uint32_t tag) {
volatile dma_ch_regs_t* ch = DMA_CH(ch_id);
// 检查通道是否空闲 防止覆盖正在运行的任务
if (ch->STATUS & STATUS_BUSY_BIT) {
// printf("channel %d busy\n", ch_id);
return; // 通道正忙,处理报错或重试
}
// 写入基本地址和长度
ch->SRC_ADDR = src;
ch->DST_ADDR = dst;
ch->LENGTH = bytes;
ch->TAG = tag; // 可选填入任务ID
// 置 Burst 属性并触发传输
// AXI SIZE: 2 (代表 2^2 = 4 Bytes与 32-bit 数据线匹配)
// AXI LEN: 15 (代表 16 beats 突发传输,最高效)
uint32_t ctrl_val = CTRL_BURST_SIZE(2) | CTRL_BURST_LEN(15);
// 写入 CTRL 并拉高 Bit 0 (Start)
ch->CTRL = ctrl_val | CTRL_START_BIT;
}
void dma_wait_polling(int ch_id) {
volatile dma_ch_regs_t* ch = DMA_CH(ch_id);
// 死等 Done bit 置 1
while (!(ch->STATUS & STATUS_DONE_BIT)) {
}
// 清除 Done 标志位 (Write 1 to Clear Bit 1)
// 注意:写 1 清 0 的设计,所以我们对 bit 1 写入 1
ch->STATUS = STATUS_DONE_BIT;
}

View File

@@ -0,0 +1,14 @@
#include "fft.h"
#include "common_func.h"
void fft_start() {
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
}
void fft_wait() {
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {}
}
unsigned int fft_get_csr() {
return RegRead(FFT_CSR_REG);
}

View File

@@ -0,0 +1,30 @@
#pragma once
#include <stdint.h>
typedef struct {
volatile uint32_t SRC_ADDR; // 0x00: 源地址
volatile uint32_t DST_ADDR; // 0x04: 目的地址
volatile uint32_t LENGTH; // 0x08: 传输总字节数
volatile uint32_t TAG; // 0x0C: 软件 Tag (硬件不处理,留给软件标记任务用)
volatile uint32_t CTRL; // 0x10: 控制寄存器 (配置 Burst 属性 + 启动)
volatile uint32_t STATUS; // 0x14: 状态寄存器 (Busy, Done)
volatile uint32_t RESV[10]; // 0x18 ~ 0x3C: 保留空间,凑齐 0x40 字节
} dma_ch_regs_t;
// DMA 控制器基地址
#define CDMA_MUX_BASE 0xbf300000
#define DMA_CH(i) ((volatile dma_ch_regs_t*)(CDMA_MUX_BASE + (i) * 0x40))
// CTRL 寄存器位定义
#define CTRL_START_BIT (1 << 0)
#define CTRL_BURST_SIZE(x) (((x) & 0x7) << 3) // Bits [5:3]: AXI AxSIZE (0=1B, 1=2B, 2=4B, 3=8B)
#define CTRL_BURST_LEN(x) (((x) & 0xFF) << 6) // Bits [13:6]: AXI AxLEN (0=1 beat, 15=16 beats)
// STATUS 寄存器位定义
#define STATUS_BUSY_BIT (1 << 0)
#define STATUS_DONE_BIT (1 << 1)
void dma_start_transfer(int ch_id, uint32_t src, uint32_t dst, uint32_t bytes, uint32_t tag);
void dma_wait_polling(int ch_id);

View File

@@ -0,0 +1,16 @@
#pragma once
#define FFT_BASE 0xbf400000
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
#define FFT_CSR_REG (FFT_BASE + 0xF000)
#define FFT_CTRL_START (1 << 4)
#define FFT_STAT_DONE (1 << 1)
#define FFT_STAT_BUSY (1 << 0)
#define FFT_POINT_NUM 1024
void fft_start();
void fft_wait();
unsigned int fft_get_csr();

View File

@@ -16,7 +16,7 @@ CFLAGS += -DSIMU=0
CFLAGS += -DFLAGS_STR=\""$(CFLAGS)"\"
CFLAGS += -g
#配置迭代次数
CFLAGS += -DITERATIONS=1
CFLAGS += -DITERATIONS=1000
OBJDIR = obj
COMMON_DIR = ../../bsp

View File

@@ -1,9 +1,12 @@
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <common_func.h>
#include <confreg_time.h>
#include <dma.h>
// BSP板级支持包所需全局变量
unsigned long UART_BASE = 0xbf000000;
@@ -11,35 +14,12 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
#define FFT_BASE 0xbf400000
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
#define FFT_CSR_REG (FFT_BASE + 0xF000)
#define FFT_CTRL_START (1 << 4)
#define FFT_STAT_DONE (1 << 1)
#define FFT_STAT_BUSY (1 << 0)
#define FFT_POINT_NUM 1024
#define DMA_BASE 0xbf300000
#define DMA_CTRL (DMA_BASE + 0x0000)
#define DMA_LEN (DMA_BASE + 0x0004)
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
#define DMA_STATUS (DMA_BASE + 0x0010)
const float PI = 3.14159265358979323846;
// 加上 aligned(64) 是为了防止 DMA 突发传输时跨越缓存行或 AXI 非对齐边界
uint32_t src_array[64] __attribute__((aligned(64)));
uint32_t dst_array[64] __attribute__((aligned(64)));
int main(int argc, char** argv)
{
unsigned int dma_status = RegRead(DMA_STATUS);
printf("dma_status init = %x\n", dma_status);
uint32_t data_len = 64;
uint32_t byte_len = data_len * 4;
@@ -65,22 +45,9 @@ int main(int argc, char** argv)
printf("uncached_src: %x\n", uncached_src);
printf("uncached_dst: %x\n", uncached_dst);
// 配置 DMADMA 只需要最纯粹的物理地址
RegWrite(DMA_SRC_ADDR, phys_src_addr);
RegWrite(DMA_DST_ADDR, phys_dst_addr);
RegWrite(DMA_LEN, byte_len);
// burst_len = 15(16拍), burst_size = 2(4字节)
uint32_t burst_len = 15;
uint32_t burst_size = 2;
uint32_t ctrl_val = (burst_len << 6) | (burst_size << 3) | 0x01;
RegWrite(DMA_CTRL, ctrl_val);
// 等待 DMA 完成
while ((RegRead(DMA_STATUS) & 0x01) == 0) {
printf("polling...\n");
}
dma_start_transfer(0, phys_src_addr, phys_dst_addr, byte_len);
dma_wait_polling(0);
printf("dma passed!\n");

View File

@@ -0,0 +1,75 @@
dma_status init = 0
src_array: 1c080d80
dst_array: 1c080c80
phys_src_array: 1c080d80
phys_dst_array: 1c080c80
uncached_src: bc080d80
uncached_dst: bc080c80
polling...
dma passed!
0: 0
1: 1
2: 4
3: 9
4: 16
5: 25
6: 36
7: 49
8: 64
9: 81
10: 100
11: 121
12: 144
13: 169
14: 196
15: 225
16: 256
17: 289
18: 324
19: 361
20: 400
21: 441
22: 484
23: 529
24: 576
25: 625
26: 676
27: 729
28: 784
29: 841
30: 900
31: 961
32: 1024
33: 1089
34: 1156
35: 1225
36: 1296
37: 1369
38: 1444
39: 1521
40: 1600
41: 1681
42: 1764
43: 1849
44: 1936
45: 2025
46: 2116
47: 2209
48: 2304
49: 2401
50: 2500
51: 2601
52: 2704
53: 2809
54: 2916
55: 3025
56: 3136
57: 3249
58: 3364
59: 3481
60: 3600
61: 3721
62: 3844
63: 3969
Success! Array to Array DMA transfer verified.

View File

@@ -1,8 +1,10 @@
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <common_func.h>
#include <confreg_time.h>
#include <fft.h>
// BSP板级支持包所需全局变量
unsigned long UART_BASE = 0xbf000000;
@@ -10,24 +12,6 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
#define FFT_BASE 0xbf400000
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
#define FFT_CSR_REG (FFT_BASE + 0xF000)
#define FFT_CTRL_START (1 << 4)
#define FFT_STAT_DONE (1 << 1)
#define FFT_STAT_BUSY (1 << 0)
#define FFT_POINT_NUM 1024
#define DMA_BASE 0xbf300000
#define DMA_CTRL (DMA_BASE + 0x0000)
#define DMA_LEN (DMA_BASE + 0x0004)
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
#define DMA_STATUS (DMA_BASE + 0x0010)
const float PI = 3.14159265358979323846;
// 读取定时器的当前Tick
@@ -35,14 +19,12 @@ unsigned int get_timer_ticks() {
return RegRead(CONFREG_TIMER_BASE);
}
// ---------------------------------------------------------
// 软件FFT实现 (基2 DIT-FFT 算法)
// ---------------------------------------------------------
void sw_fft(float re[], float im[], int n) {
int i, j, k, l;
float tr, ti, ur, ui, wr, wi;
// 1. 比特翻转 (Bit Reversal)
// 比特翻转 (Bit Reversal)
j = 0;
for (i = 0; i < n - 1; i++) {
if (i < j) {
@@ -58,7 +40,7 @@ void sw_fft(float re[], float im[], int n) {
j += k;
}
// 2. 蝶形运算 (Butterfly Computation)
// 蝶形运算 (Butterfly Computation)
for (l = 1; l < n; l *= 2) {
ur = 1.0;
ui = 0.0;
@@ -110,9 +92,7 @@ int main(int argc, char** argv)
unsigned int tick_start, tick_end;
unsigned int hw_time, sw_time;
// ==========================================
// 1. 硬件加速 FFT 测试
// ==========================================
// 硬件加速 FFT 测试
printf("\n--- Starting Hardware FFT ---\n");
tick_start = get_ns();
@@ -122,10 +102,8 @@ int main(int argc, char** argv)
}
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {
// 等待硬件计算完成
}
fft_start();
fft_wait();
tick_end = get_ns();

View File

@@ -0,0 +1,19 @@
fft_csr = 0
--- Starting Hardware FFT ---
--- Starting Software FFT ---
--- Performance Comparison ---
Timer Clock Freq : 50000000 Hz
Hardware FFT Time: 1695360 ns (1.695 ms)
Software FFT Time: 274547190 ns (274.547 ms)
Speedup Ratio : 161.94x
--- Verification (Only showing Bins with energy > 10) ---
Bin [ 0] Hz: HW(Re: 4007, Im: 0) | SW(Re: 4000, Im: 0)
Bin [ 10] Hz: HW(Re: 4006, Im: -1) | SW(Re: 3999, Im: 0)
Bin [ 200] Hz: HW(Re: 2, Im: -2994) | SW(Re: 0, Im: -3000)
Bin [ 400] Hz: HW(Re: -5, Im: -1496) | SW(Re: 0, Im: -1499)
Bin [ 624] Hz: HW(Re: -5, Im: 1497) | SW(Re: 0, Im: 1500)
Bin [ 824] Hz: HW(Re: 3, Im: 2994) | SW(Re: 0, Im: 2999)
Bin [1014] Hz: HW(Re: 4007, Im: 0) | SW(Re: 3999, Im: 0)

View File

@@ -1,9 +1,13 @@
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <stdint.h>
#include <common_func.h>
#include <confreg_time.h>
#include <dma.h>
#include <fft.h>
// BSP板级支持包所需全局变量
unsigned long UART_BASE = 0xbf000000;
@@ -11,42 +15,8 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
#define FFT_BASE 0xbf400000
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
#define FFT_CSR_REG (FFT_BASE + 0xF000)
#define FFT_CTRL_START (1 << 4)
#define FFT_STAT_DONE (1 << 1)
#define FFT_STAT_BUSY (1 << 0)
#define FFT_POINT_NUM 1024
#define DMA_BASE 0xbf300000
#define DMA_CTRL (DMA_BASE + 0x0000)
#define DMA_LEN (DMA_BASE + 0x0004)
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
#define DMA_STATUS (DMA_BASE + 0x0010)
const float PI = 3.14159265358979323846;
// DMA 传输通用封装函数 (阻塞等待模式)
void dma_transfer(uint32_t phys_src, uint32_t phys_dst, uint32_t byte_len) {
RegWrite(DMA_SRC_ADDR, phys_src);
RegWrite(DMA_DST_ADDR, phys_dst);
RegWrite(DMA_LEN, byte_len);
// burst_len = 15(16拍), burst_size = 2(4字节), start = 1
uint32_t ctrl_val = (15 << 6) | (2 << 3) | 0x01;
RegWrite(DMA_CTRL, ctrl_val);
// 轮询等待 DMA 搬运完成
while ((RegRead(DMA_STATUS) & 0x01) == 0) {
// CPU 空转等待
}
}
// 软件FFT实现 (基2 DIT-FFT 算法)
void sw_fft(float re[], float im[], int n) {
int i, j, k, l;
@@ -152,18 +122,22 @@ int main(int argc, char** argv)
uint32_t transfer_bytes = FFT_POINT_NUM * 4; // 1024个点 * 4字节
// MA 将数据从内存搬运到 FFT 输入外设
dma_transfer(((uint32_t)hw_in_re_arr & 0x1FFFFFFF), phys_fft_in_re, transfer_bytes);
dma_transfer(((uint32_t)hw_in_im_arr & 0x1FFFFFFF), phys_fft_in_im, transfer_bytes);
dma_start_transfer(0, ((uint32_t)hw_in_re_arr & 0x1FFFFFFF), phys_fft_in_re, transfer_bytes, 100);
dma_start_transfer(1, ((uint32_t)hw_in_im_arr & 0x1FFFFFFF), phys_fft_in_im, transfer_bytes, 200);
dma_wait_polling(0);
dma_wait_polling(1);
// 启动 FFT 并等待计算完成
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {
// poll
}
fft_start();
fft_wait();
// DMA 将结果从 FFT 输出外设搬回内存
dma_transfer(phys_fft_out_re, ((uint32_t)hw_out_re_arr & 0x1FFFFFFF), transfer_bytes);
dma_transfer(phys_fft_out_im, ((uint32_t)hw_out_im_arr & 0x1FFFFFFF), transfer_bytes);
dma_start_transfer(0, phys_fft_out_re, ((uint32_t)hw_out_re_arr & 0x1FFFFFFF), transfer_bytes, 10);
dma_start_transfer(1, phys_fft_out_im, ((uint32_t)hw_out_im_arr & 0x1FFFFFFF), transfer_bytes, 20);
dma_wait_polling(0);
dma_wait_polling(1);
tick_end = get_ns(); // 结束计时
hw_time = tick_end - tick_start;

View File

@@ -0,0 +1,19 @@
fft_csr init = 0
--- Starting Hardware FFT with DMA ---
--- Starting Software FFT ---
--- Performance Comparison ---
Timer Clock Freq : 50000000 Hz
Hardware FFT Time: 407310 ns (0.407 ms)
Software FFT Time: 274786920 ns (274.787 ms)
Speedup Ratio : 674.64x
--- Verification (Only showing Bins with energy > 10) ---
Bin [ 0] Hz: HW(Re: 4007, Im: 0) | SW(Re: 4000, Im: 0)
Bin [ 10] Hz: HW(Re: 4006, Im: -1) | SW(Re: 3999, Im: 0)
Bin [ 200] Hz: HW(Re: 2, Im: -2994) | SW(Re: 0, Im: -3000)
Bin [ 400] Hz: HW(Re: -5, Im: -1496) | SW(Re: 0, Im: -1499)
Bin [ 624] Hz: HW(Re: -5, Im: 1497) | SW(Re: 0, Im: 1500)
Bin [ 824] Hz: HW(Re: 3, Im: 2994) | SW(Re: 0, Im: 2999)
Bin [1014] Hz: HW(Re: 4007, Im: 0) | SW(Re: 3999, Im: 0)

View File

@@ -3,7 +3,8 @@
#include <stdarg.h>
#include <string.h>
#include "common_func.h"
#include <common_func.h>
#include <dvi.h>
//BSP板级支持包所需全局变量
unsigned long UART_BASE = 0xbf000000; //UART16550的虚地址

View File

@@ -184,12 +184,7 @@ always @(posedge clk)
begin
if(uart_display)
begin
if(uart_data==8'hff)
begin
;//$finish;
end
else
begin
if(uart_data !=8'hff) begin
$write("%c",uart_data);
end
end

View File

@@ -27,23 +27,28 @@ module sram_sp #(
assign write_enable[3:0] = (~ram_be_n) & {4{(~ram_ce_n) & (~ram_we_n)}};
always@(posedge write_enable[0]) begin
#10;
if(~ram_be_n[0]) BRAM[ram_addr][7:0] <= ram_data[7:0];
end
always@(posedge write_enable[1]) begin
#10;
if(~ram_be_n[1]) BRAM[ram_addr][15:8] <= ram_data[15:8];
end
always@(posedge write_enable[2]) begin
#10;
if(~ram_be_n[2]) BRAM[ram_addr][23:16] <= ram_data[23:16];
end
always@(posedge write_enable[3]) begin
#10;
if(~ram_be_n[3]) BRAM[ram_addr][31:24] <= ram_data[31:24];
always @(*) begin
if (write_enable[0]) BRAM[ram_addr][7:0] = ram_data[7:0];
if (write_enable[1]) BRAM[ram_addr][15:8] = ram_data[15:8];
if (write_enable[2]) BRAM[ram_addr][23:16] = ram_data[23:16];
if (write_enable[3]) BRAM[ram_addr][31:24] = ram_data[31:24];
end
// always@(posedge write_enable[0]) begin
// #10;
// if(~ram_be_n[0]) BRAM[ram_addr][7:0] <= ram_data[7:0];
// end
// always@(posedge write_enable[1]) begin
// #10;
// if(~ram_be_n[1]) BRAM[ram_addr][15:8] <= ram_data[15:8];
// end
// always@(posedge write_enable[2]) begin
// #10;
// if(~ram_be_n[2]) BRAM[ram_addr][23:16] <= ram_data[23:16];
// end
// always@(posedge write_enable[3]) begin
// #10;
// if(~ram_be_n[3]) BRAM[ram_addr][31:24] <= ram_data[31:24];
// end
wire [31:0] RDATA = BRAM[ram_addr];