Compare commits
10 Commits
be60db6280
...
09d71b6ad3
| Author | SHA1 | Date | |
|---|---|---|---|
| 09d71b6ad3 | |||
| 9235d4f456 | |||
| db6c93d8ec | |||
| 1a51b3f67b | |||
| d082f9e3d2 | |||
| 84155db987 | |||
| 63fb8f6cc1 | |||
| c4a34f7b38 | |||
| 7754496a52 | |||
| 8da1d3f13d |
31
README.MD
Normal file
31
README.MD
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# 2026年全国大学生集成电路创新创业大赛 - 龙芯中科杯
|
||||||
|
|
||||||
|
本仓库为 2026年全国大学生集成电路创新创业大赛(CICC) “龙芯中科”企业命题的参赛代码仓库。
|
||||||
|
|
||||||
|
## 团队信息
|
||||||
|
|
||||||
|
- 队伍编号:CICC1008439
|
||||||
|
- 队伍名称:碳基生物
|
||||||
|
- 参赛赛道:龙芯中科企业命题
|
||||||
|
|
||||||
|
## 项目简介
|
||||||
|
|
||||||
|
本项目基于龙芯指令集架构(Loongson Architecture),针对特定应用场景(如数字信号处理/嵌入式系统优化)设计了一套高性能的系统级芯片(SoC)或核心外设子系统。旨在充分发挥龙芯架构的优势,通过自定义硬件加速器和高效的总线管理提升系统整体效能。
|
||||||
|
|
||||||
|
## 已完成功能模块
|
||||||
|
|
||||||
|
- 外部中断控制器
|
||||||
|
- FFT 硬件加速器
|
||||||
|
- 8-mux mm2mm DMA 控制器
|
||||||
|
|
||||||
|
## 目录结构
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── fpga/ # Vivado工程文件与Tcl自动化构建脚本
|
||||||
|
├── doc/ # 参赛技术文档、设计说明书及测试报告
|
||||||
|
├── rtl/ # 硬件源代码 (龙芯核、中断控制器、FFT、DMA等)
|
||||||
|
├── sdk/ # 嵌入式软件开发 (针对龙芯架构的C语言驱动与应用)
|
||||||
|
├── sim/ # 仿真验证环境 (Testbench与仿真脚本)
|
||||||
|
└── README.md # 项目总体说明与快速上手指南
|
||||||
|
```
|
||||||
@@ -24,11 +24,11 @@ module axi_dvi #
|
|||||||
input [3:0] s_awcache,
|
input [3:0] s_awcache,
|
||||||
input [2:0] s_awprot,
|
input [2:0] s_awprot,
|
||||||
input s_wvalid,
|
input s_wvalid,
|
||||||
output s_wready,
|
output reg s_wready,
|
||||||
input [31:0] s_wdata,
|
input [31:0] s_wdata,
|
||||||
input [3:0] s_wstrb,
|
input [3:0] s_wstrb,
|
||||||
input s_wlast,
|
input s_wlast,
|
||||||
output s_bvalid,
|
output reg s_bvalid,
|
||||||
input s_bready,
|
input s_bready,
|
||||||
output [4:0] s_bid,
|
output [4:0] s_bid,
|
||||||
output [1:0] s_bresp,
|
output [1:0] s_bresp,
|
||||||
@@ -42,12 +42,12 @@ module axi_dvi #
|
|||||||
input [0:0] s_arlock,
|
input [0:0] s_arlock,
|
||||||
input [3:0] s_arcache,
|
input [3:0] s_arcache,
|
||||||
input [2:0] s_arprot,
|
input [2:0] s_arprot,
|
||||||
output s_rvalid,
|
output reg s_rvalid,
|
||||||
input s_rready,
|
input s_rready,
|
||||||
output [31:0] s_rdata,
|
output reg [31:0] s_rdata,
|
||||||
output [4:0] s_rid,
|
output [4:0] s_rid,
|
||||||
output [1:0] s_rresp,
|
output [1:0] s_rresp,
|
||||||
output s_rlast,
|
output reg s_rlast,
|
||||||
|
|
||||||
output video_clk, // Video clock signal
|
output video_clk, // Video clock signal
|
||||||
output hsync, // Horizontal sync signal
|
output hsync, // Horizontal sync signal
|
||||||
@@ -64,7 +64,6 @@ module axi_dvi #
|
|||||||
reg [31:0] DVI_RECT_DIR,DVI_RECT_L_W,DVI_SQU_DIR,DVI_SQU_R;
|
reg [31:0] DVI_RECT_DIR,DVI_RECT_L_W,DVI_SQU_DIR,DVI_SQU_R;
|
||||||
|
|
||||||
reg busy,write,R_or_W;
|
reg busy,write,R_or_W;
|
||||||
reg s_wready;
|
|
||||||
|
|
||||||
wire ar_enter = s_arvalid & s_arready;
|
wire ar_enter = s_arvalid & s_arready;
|
||||||
wire r_retire = s_rvalid & s_rready & s_rlast;
|
wire r_retire = s_rvalid & s_rready & s_rlast;
|
||||||
@@ -126,8 +125,6 @@ module axi_dvi #
|
|||||||
else if(w_enter & s_wlast) s_wready <= 1'b0;
|
else if(w_enter & s_wlast) s_wready <= 1'b0;
|
||||||
|
|
||||||
|
|
||||||
reg [31:0] s_rdata;
|
|
||||||
reg s_rvalid,s_rlast;
|
|
||||||
wire [31:0] rdata_d = buf_addr[15:0] == 16'h0 ? DVI_RECT_DIR :
|
wire [31:0] rdata_d = buf_addr[15:0] == 16'h0 ? DVI_RECT_DIR :
|
||||||
buf_addr[15:0] == 16'h4 ? DVI_RECT_L_W :
|
buf_addr[15:0] == 16'h4 ? DVI_RECT_L_W :
|
||||||
buf_addr[15:0] == 16'h8 ? DVI_SQU_DIR :
|
buf_addr[15:0] == 16'h8 ? DVI_SQU_DIR :
|
||||||
@@ -152,7 +149,6 @@ module axi_dvi #
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
reg s_bvalid;
|
|
||||||
always@(posedge aclk) begin
|
always@(posedge aclk) begin
|
||||||
if(~aresetn) s_bvalid <= 1'b0;
|
if(~aresetn) s_bvalid <= 1'b0;
|
||||||
else if(w_enter) s_bvalid <= 1'b1;
|
else if(w_enter) s_bvalid <= 1'b1;
|
||||||
|
|||||||
@@ -134,6 +134,12 @@ initial begin
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
localparam SAFE_EXP_MSB = (AXIL_ADDR_BIT_OFFSET > AXI_ADDR_BIT_OFFSET) ? AXIL_ADDR_BIT_OFFSET - 1 : AXI_ADDR_BIT_OFFSET;
|
||||||
|
localparam SAFE_EXP_LSB = AXI_ADDR_BIT_OFFSET;
|
||||||
|
|
||||||
|
localparam SAFE_NAR_MSB = (AXI_ADDR_BIT_OFFSET > AXIL_ADDR_BIT_OFFSET) ? AXI_ADDR_BIT_OFFSET - 1 : AXIL_ADDR_BIT_OFFSET;
|
||||||
|
localparam SAFE_NAR_LSB = AXIL_ADDR_BIT_OFFSET;
|
||||||
|
|
||||||
localparam [1:0]
|
localparam [1:0]
|
||||||
STATE_IDLE = 2'd0,
|
STATE_IDLE = 2'd0,
|
||||||
STATE_DATA = 2'd1,
|
STATE_DATA = 2'd1,
|
||||||
@@ -286,7 +292,7 @@ always @* begin
|
|||||||
|
|
||||||
if (m_axil_rready && m_axil_rvalid) begin
|
if (m_axil_rready && m_axil_rvalid) begin
|
||||||
s_axi_rid_next = id_reg;
|
s_axi_rid_next = id_reg;
|
||||||
s_axi_rdata_next = m_axil_rdata >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
|
s_axi_rdata_next = m_axil_rdata >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
|
||||||
s_axi_rresp_next = m_axil_rresp;
|
s_axi_rresp_next = m_axil_rresp;
|
||||||
s_axi_rlast_next = 1'b0;
|
s_axi_rlast_next = 1'b0;
|
||||||
s_axi_rvalid_next = 1'b1;
|
s_axi_rvalid_next = 1'b1;
|
||||||
@@ -316,7 +322,7 @@ always @* begin
|
|||||||
s_axi_rid_next = id_reg;
|
s_axi_rid_next = id_reg;
|
||||||
data_next = m_axil_rdata;
|
data_next = m_axil_rdata;
|
||||||
resp_next = m_axil_rresp;
|
resp_next = m_axil_rresp;
|
||||||
s_axi_rdata_next = m_axil_rdata >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
|
s_axi_rdata_next = m_axil_rdata >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
|
||||||
s_axi_rresp_next = m_axil_rresp;
|
s_axi_rresp_next = m_axil_rresp;
|
||||||
s_axi_rlast_next = 1'b0;
|
s_axi_rlast_next = 1'b0;
|
||||||
s_axi_rvalid_next = 1'b1;
|
s_axi_rvalid_next = 1'b1;
|
||||||
@@ -346,7 +352,7 @@ always @* begin
|
|||||||
|
|
||||||
if (s_axi_rready || !s_axi_rvalid) begin
|
if (s_axi_rready || !s_axi_rvalid) begin
|
||||||
s_axi_rid_next = id_reg;
|
s_axi_rid_next = id_reg;
|
||||||
s_axi_rdata_next = data_reg >> (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_DATA_WIDTH);
|
s_axi_rdata_next = data_reg >> (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_DATA_WIDTH);
|
||||||
s_axi_rresp_next = resp_reg;
|
s_axi_rresp_next = resp_reg;
|
||||||
s_axi_rlast_next = 1'b0;
|
s_axi_rlast_next = 1'b0;
|
||||||
s_axi_rvalid_next = 1'b1;
|
s_axi_rvalid_next = 1'b1;
|
||||||
@@ -412,7 +418,7 @@ always @* begin
|
|||||||
m_axil_rready_next = !s_axi_rvalid && !m_axil_arvalid;
|
m_axil_rready_next = !s_axi_rvalid && !m_axil_arvalid;
|
||||||
|
|
||||||
if (m_axil_rready && m_axil_rvalid) begin
|
if (m_axil_rready && m_axil_rvalid) begin
|
||||||
data_next[addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = m_axil_rdata;
|
data_next[addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = m_axil_rdata;
|
||||||
if (m_axil_rresp) begin
|
if (m_axil_rresp) begin
|
||||||
resp_next = m_axil_rresp;
|
resp_next = m_axil_rresp;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -146,6 +146,12 @@ localparam [1:0]
|
|||||||
STATE_DATA_2 = 2'd2,
|
STATE_DATA_2 = 2'd2,
|
||||||
STATE_RESP = 2'd3;
|
STATE_RESP = 2'd3;
|
||||||
|
|
||||||
|
// 添加安全的位选边界,防止 ModelSim 报 Range Reversed 错误
|
||||||
|
localparam SAFE_EXP_MSB = (AXIL_ADDR_BIT_OFFSET > AXI_ADDR_BIT_OFFSET) ? AXIL_ADDR_BIT_OFFSET - 1 : AXI_ADDR_BIT_OFFSET;
|
||||||
|
localparam SAFE_EXP_LSB = AXI_ADDR_BIT_OFFSET;
|
||||||
|
|
||||||
|
localparam SAFE_NAR_MSB = (AXI_ADDR_BIT_OFFSET > AXIL_ADDR_BIT_OFFSET) ? AXI_ADDR_BIT_OFFSET - 1 : AXIL_ADDR_BIT_OFFSET;
|
||||||
|
localparam SAFE_NAR_LSB = AXIL_ADDR_BIT_OFFSET;
|
||||||
reg [1:0] state_reg = STATE_IDLE, state_next;
|
reg [1:0] state_reg = STATE_IDLE, state_next;
|
||||||
|
|
||||||
reg [AXI_ID_WIDTH-1:0] id_reg = {AXI_ID_WIDTH{1'b0}}, id_next;
|
reg [AXI_ID_WIDTH-1:0] id_reg = {AXI_ID_WIDTH{1'b0}}, id_next;
|
||||||
@@ -335,7 +341,7 @@ always @* begin
|
|||||||
|
|
||||||
if (s_axi_wready && s_axi_wvalid) begin
|
if (s_axi_wready && s_axi_wvalid) begin
|
||||||
m_axil_wdata_next = {(AXIL_WORD_WIDTH/AXI_WORD_WIDTH){s_axi_wdata}};
|
m_axil_wdata_next = {(AXIL_WORD_WIDTH/AXI_WORD_WIDTH){s_axi_wdata}};
|
||||||
m_axil_wstrb_next = s_axi_wstrb << (addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET] * AXI_STRB_WIDTH);
|
m_axil_wstrb_next = s_axi_wstrb << (addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB] * AXI_STRB_WIDTH);
|
||||||
m_axil_wvalid_next = 1'b1;
|
m_axil_wvalid_next = 1'b1;
|
||||||
burst_next = burst_reg - 1;
|
burst_next = burst_reg - 1;
|
||||||
burst_active_next = burst_reg != 0;
|
burst_active_next = burst_reg != 0;
|
||||||
@@ -354,13 +360,13 @@ always @* begin
|
|||||||
if (CONVERT_NARROW_BURST) begin
|
if (CONVERT_NARROW_BURST) begin
|
||||||
for (i = 0; i < AXI_WORD_WIDTH; i = i + 1) begin
|
for (i = 0; i < AXI_WORD_WIDTH; i = i + 1) begin
|
||||||
if (s_axi_wstrb[i]) begin
|
if (s_axi_wstrb[i]) begin
|
||||||
data_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH+i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE] = s_axi_wdata[i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE];
|
data_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_DATA_WIDTH+i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE] = s_axi_wdata[i*AXIL_WORD_SIZE +: AXIL_WORD_SIZE];
|
||||||
strb_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_STRB_WIDTH+i] = 1'b1;
|
strb_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_STRB_WIDTH+i] = 1'b1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
data_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = s_axi_wdata;
|
data_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_DATA_WIDTH +: SEGMENT_DATA_WIDTH] = s_axi_wdata;
|
||||||
strb_next[addr_reg[AXIL_ADDR_BIT_OFFSET-1:AXI_ADDR_BIT_OFFSET]*SEGMENT_STRB_WIDTH +: SEGMENT_STRB_WIDTH] = s_axi_wstrb;
|
strb_next[addr_reg[SAFE_EXP_MSB:SAFE_EXP_LSB]*SEGMENT_STRB_WIDTH +: SEGMENT_STRB_WIDTH] = s_axi_wstrb;
|
||||||
end
|
end
|
||||||
m_axil_wdata_next = data_next;
|
m_axil_wdata_next = data_next;
|
||||||
m_axil_wstrb_next = strb_next;
|
m_axil_wstrb_next = strb_next;
|
||||||
@@ -451,8 +457,8 @@ always @* begin
|
|||||||
if (s_axi_wready && s_axi_wvalid) begin
|
if (s_axi_wready && s_axi_wvalid) begin
|
||||||
data_next = s_axi_wdata;
|
data_next = s_axi_wdata;
|
||||||
strb_next = s_axi_wstrb;
|
strb_next = s_axi_wstrb;
|
||||||
m_axil_wdata_next = s_axi_wdata >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_DATA_WIDTH);
|
m_axil_wdata_next = s_axi_wdata >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_DATA_WIDTH);
|
||||||
m_axil_wstrb_next = s_axi_wstrb >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_STRB_WIDTH);
|
m_axil_wstrb_next = s_axi_wstrb >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_STRB_WIDTH);
|
||||||
m_axil_wvalid_next = 1'b1;
|
m_axil_wvalid_next = 1'b1;
|
||||||
burst_next = burst_reg - 1;
|
burst_next = burst_reg - 1;
|
||||||
burst_active_next = burst_reg != 0;
|
burst_active_next = burst_reg != 0;
|
||||||
@@ -469,8 +475,8 @@ always @* begin
|
|||||||
s_axi_wready_next = 1'b0;
|
s_axi_wready_next = 1'b0;
|
||||||
|
|
||||||
if (!m_axil_wvalid || m_axil_wready) begin
|
if (!m_axil_wvalid || m_axil_wready) begin
|
||||||
m_axil_wdata_next = data_reg >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_DATA_WIDTH);
|
m_axil_wdata_next = data_reg >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_DATA_WIDTH);
|
||||||
m_axil_wstrb_next = strb_reg >> (addr_reg[AXI_ADDR_BIT_OFFSET-1:AXIL_ADDR_BIT_OFFSET] * AXIL_STRB_WIDTH);
|
m_axil_wstrb_next = strb_reg >> (addr_reg[SAFE_NAR_MSB:SAFE_NAR_LSB] * AXIL_STRB_WIDTH);
|
||||||
m_axil_wvalid_next = 1'b1;
|
m_axil_wvalid_next = 1'b1;
|
||||||
addr_next = (addr_reg + (1 << master_burst_size_reg)) & ({ADDR_WIDTH{1'b1}} << master_burst_size_reg);
|
addr_next = (addr_reg + (1 << master_burst_size_reg)) & ({ADDR_WIDTH{1'b1}} << master_burst_size_reg);
|
||||||
last_segment_next = addr_next[burst_size_reg] != addr_reg[burst_size_reg];
|
last_segment_next = addr_next[burst_size_reg] != addr_reg[burst_size_reg];
|
||||||
|
|||||||
381
rtl/ip/cdma/snix_axil_cdma_mux.sv
Normal file
381
rtl/ip/cdma/snix_axil_cdma_mux.sv
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
// ============================================================================
|
||||||
|
// snix_axil_cdma_mux.sv
|
||||||
|
// Multi-Channel AXI-Lite MUX Wrapper for snix_axi_mm2mm engine
|
||||||
|
//
|
||||||
|
// Register Map (per channel, offset 0x40):
|
||||||
|
// 0x00: READ_ADDR (Source Address)
|
||||||
|
// 0x04: WRITE_ADDR (Destination Address)
|
||||||
|
// 0x08: LENGTH (Transfer length in bytes)
|
||||||
|
// 0x0C: TAG (User tag, purely for software tracking)
|
||||||
|
// 0x10: CTRL (Write 1 to bit 0 to trigger)
|
||||||
|
// [5:3] = AXI AxSIZE (0=1B, 1=2B, 2=4B, 3=8B...)
|
||||||
|
// [13:6] = AXI AxLEN (0=1 beat, 15=16 beats...)
|
||||||
|
// 0x14: STATUS (Bit 0: Busy RO, Bit 1: Done W1C)
|
||||||
|
// ============================================================================
|
||||||
|
`timescale 1ns / 1ps
|
||||||
|
|
||||||
|
module snix_axil_cdma_mux #(
|
||||||
|
parameter int ADDR_WIDTH = 32,
|
||||||
|
parameter int DATA_WIDTH = 32, // Matches engine default
|
||||||
|
parameter int AXIL_ADDR_WIDTH = 32,
|
||||||
|
parameter int AXIL_DATA_WIDTH = 32,
|
||||||
|
parameter int ID_WIDTH = 4,
|
||||||
|
parameter int USER_WIDTH = 1,
|
||||||
|
parameter int PORTS = 8,
|
||||||
|
parameter int FIFO_DEPTH = 16
|
||||||
|
) (
|
||||||
|
input logic clk,
|
||||||
|
input logic rst_n,
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// AXI-Lite Slave Interface (CPU CSR Access)
|
||||||
|
// ==========================================
|
||||||
|
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_awaddr,
|
||||||
|
input logic s_axil_awvalid,
|
||||||
|
output logic s_axil_awready,
|
||||||
|
input logic [AXIL_DATA_WIDTH-1:0] s_axil_wdata,
|
||||||
|
input logic [AXIL_DATA_WIDTH/8-1:0] s_axil_wstrb,
|
||||||
|
input logic s_axil_wvalid,
|
||||||
|
output logic s_axil_wready,
|
||||||
|
output logic [1:0] s_axil_bresp,
|
||||||
|
output logic s_axil_bvalid,
|
||||||
|
input logic s_axil_bready,
|
||||||
|
input logic [AXIL_ADDR_WIDTH-1:0] s_axil_araddr,
|
||||||
|
input logic s_axil_arvalid,
|
||||||
|
output logic s_axil_arready,
|
||||||
|
output logic [AXIL_DATA_WIDTH-1:0] s_axil_rdata,
|
||||||
|
output logic [1:0] s_axil_rresp,
|
||||||
|
output logic s_axil_rvalid,
|
||||||
|
input logic s_axil_rready,
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// AXI4 Master Interface (To Crossbar/Memory)
|
||||||
|
// ==========================================
|
||||||
|
output logic [ID_WIDTH-1:0] mm2mm_awid,
|
||||||
|
output logic [ADDR_WIDTH-1:0] mm2mm_awaddr,
|
||||||
|
output logic [7:0] mm2mm_awlen,
|
||||||
|
output logic [2:0] mm2mm_awsize,
|
||||||
|
output logic [1:0] mm2mm_awburst,
|
||||||
|
output logic mm2mm_awlock,
|
||||||
|
output logic [3:0] mm2mm_awcache,
|
||||||
|
output logic [2:0] mm2mm_awprot,
|
||||||
|
output logic [3:0] mm2mm_awqos,
|
||||||
|
output logic [USER_WIDTH-1:0] mm2mm_awuser,
|
||||||
|
output logic mm2mm_awvalid,
|
||||||
|
input logic mm2mm_awready,
|
||||||
|
output logic [DATA_WIDTH-1:0] mm2mm_wdata,
|
||||||
|
output logic [DATA_WIDTH/8-1:0] mm2mm_wstrb,
|
||||||
|
output logic mm2mm_wlast,
|
||||||
|
output logic [USER_WIDTH-1:0] mm2mm_wuser,
|
||||||
|
output logic mm2mm_wvalid,
|
||||||
|
input logic mm2mm_wready,
|
||||||
|
input logic [ID_WIDTH-1:0] mm2mm_bid,
|
||||||
|
input logic [1:0] mm2mm_bresp,
|
||||||
|
input logic [USER_WIDTH-1:0] mm2mm_buser,
|
||||||
|
input logic mm2mm_bvalid,
|
||||||
|
output logic mm2mm_bready,
|
||||||
|
output logic [ID_WIDTH-1:0] mm2mm_arid,
|
||||||
|
output logic [ADDR_WIDTH-1:0] mm2mm_araddr,
|
||||||
|
output logic [7:0] mm2mm_arlen,
|
||||||
|
output logic [2:0] mm2mm_arsize,
|
||||||
|
output logic [1:0] mm2mm_arburst,
|
||||||
|
output logic mm2mm_arlock,
|
||||||
|
output logic [3:0] mm2mm_arcache,
|
||||||
|
output logic [2:0] mm2mm_arprot,
|
||||||
|
output logic [3:0] mm2mm_arqos,
|
||||||
|
output logic [USER_WIDTH-1:0] mm2mm_aruser,
|
||||||
|
output logic mm2mm_arvalid,
|
||||||
|
input logic mm2mm_arready,
|
||||||
|
input logic [ID_WIDTH-1:0] mm2mm_rid,
|
||||||
|
input logic [DATA_WIDTH-1:0] mm2mm_rdata,
|
||||||
|
input logic [1:0] mm2mm_rresp,
|
||||||
|
input logic mm2mm_rlast,
|
||||||
|
input logic [USER_WIDTH-1:0] mm2mm_ruser,
|
||||||
|
input logic mm2mm_rvalid,
|
||||||
|
output logic mm2mm_rready,
|
||||||
|
|
||||||
|
// Global Interrupt (OR'd from all channels)
|
||||||
|
output logic dma_finish
|
||||||
|
);
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Local Parameters & Utilities
|
||||||
|
// ==========================================
|
||||||
|
localparam int CH_BITS = $clog2(PORTS);
|
||||||
|
|
||||||
|
// Function to safely apply WSTRB to 32-bit registers
|
||||||
|
function automatic logic [31:0] apply_wstrb(
|
||||||
|
input logic [31:0] old_val,
|
||||||
|
input logic [31:0] new_val,
|
||||||
|
input logic [3:0] wstrb
|
||||||
|
);
|
||||||
|
logic [31:0] res;
|
||||||
|
res[7:0] = wstrb[0] ? new_val[7:0] : old_val[7:0];
|
||||||
|
res[15:8] = wstrb[1] ? new_val[15:8] : old_val[15:8];
|
||||||
|
res[23:16] = wstrb[2] ? new_val[23:16] : old_val[23:16];
|
||||||
|
res[31:24] = wstrb[3] ? new_val[31:24] : old_val[31:24];
|
||||||
|
return res;
|
||||||
|
endfunction
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Internal Registers (Per Channel)
|
||||||
|
// ==========================================
|
||||||
|
logic [ADDR_WIDTH-1:0] ch_src_addr [PORTS];
|
||||||
|
logic [ADDR_WIDTH-1:0] ch_dst_addr [PORTS];
|
||||||
|
logic [31:0] ch_len [PORTS];
|
||||||
|
logic [31:0] ch_tag [PORTS];
|
||||||
|
logic [31:0] ch_ctrl [PORTS];
|
||||||
|
|
||||||
|
logic [PORTS-1:0] ch_req; // Pending requests (Busy)
|
||||||
|
logic [PORTS-1:0] ch_done; // Completion flags
|
||||||
|
|
||||||
|
logic [PORTS-1:0] arb_set_done; // From Arbiter to CSR
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Address Decoding (0x40 offset per channel)
|
||||||
|
// ==========================================
|
||||||
|
wire [CH_BITS-1:0] wr_ch = s_axil_awaddr[6 +: CH_BITS];
|
||||||
|
wire [5:0] wr_reg = s_axil_awaddr[5:0];
|
||||||
|
wire [CH_BITS-1:0] rd_ch = s_axil_araddr[6 +: CH_BITS];
|
||||||
|
wire [5:0] rd_reg = s_axil_araddr[5:0];
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// AXI-Lite Slave Logic (Robust Backpressure)
|
||||||
|
// ==========================================
|
||||||
|
assign s_axil_bresp = 2'b00;
|
||||||
|
assign s_axil_rresp = 2'b00;
|
||||||
|
|
||||||
|
// Write Path Handshake
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
s_axil_awready <= 1'b0;
|
||||||
|
s_axil_wready <= 1'b0;
|
||||||
|
end else begin
|
||||||
|
if (s_axil_awvalid && s_axil_wvalid && !s_axil_awready && (!s_axil_bvalid || s_axil_bready)) begin
|
||||||
|
s_axil_awready <= 1'b1;
|
||||||
|
s_axil_wready <= 1'b1;
|
||||||
|
end else begin
|
||||||
|
s_axil_awready <= 1'b0;
|
||||||
|
s_axil_wready <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
wire do_write = s_axil_awready && s_axil_awvalid && s_axil_wready && s_axil_wvalid;
|
||||||
|
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
s_axil_bvalid <= 1'b0;
|
||||||
|
end else begin
|
||||||
|
if (do_write) begin
|
||||||
|
s_axil_bvalid <= 1'b1;
|
||||||
|
end else if (s_axil_bready && s_axil_bvalid) begin
|
||||||
|
s_axil_bvalid <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// Read Path Handshake
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
s_axil_arready <= 1'b0;
|
||||||
|
end else begin
|
||||||
|
if (s_axil_arvalid && !s_axil_arready && (!s_axil_rvalid || s_axil_rready)) begin
|
||||||
|
s_axil_arready <= 1'b1;
|
||||||
|
end else begin
|
||||||
|
s_axil_arready <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
wire do_read = s_axil_arready && s_axil_arvalid;
|
||||||
|
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
s_axil_rvalid <= 1'b0;
|
||||||
|
s_axil_rdata <= '0;
|
||||||
|
end else begin
|
||||||
|
if (do_read) begin
|
||||||
|
s_axil_rvalid <= 1'b1;
|
||||||
|
if (rd_ch < PORTS) begin
|
||||||
|
case (rd_reg)
|
||||||
|
6'h00: s_axil_rdata <= ch_src_addr[rd_ch];
|
||||||
|
6'h04: s_axil_rdata <= ch_dst_addr[rd_ch];
|
||||||
|
6'h08: s_axil_rdata <= ch_len[rd_ch];
|
||||||
|
6'h0C: s_axil_rdata <= ch_tag[rd_ch];
|
||||||
|
6'h10: s_axil_rdata <= ch_ctrl[rd_ch];
|
||||||
|
6'h14: s_axil_rdata <= {30'd0, ch_done[rd_ch], ch_req[rd_ch]};
|
||||||
|
default: s_axil_rdata <= 32'd0;
|
||||||
|
endcase
|
||||||
|
end else begin
|
||||||
|
s_axil_rdata <= 32'd0; // Out of bounds
|
||||||
|
end
|
||||||
|
end else if (s_axil_rready && s_axil_rvalid) begin
|
||||||
|
s_axil_rvalid <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Register File Write Logic
|
||||||
|
// ==========================================
|
||||||
|
assign dma_finish = |ch_done;
|
||||||
|
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
ch_req <= '0;
|
||||||
|
ch_done <= '0;
|
||||||
|
for (int i=0; i<PORTS; i++) begin
|
||||||
|
ch_src_addr[i] <= '0; ch_dst_addr[i] <= '0;
|
||||||
|
ch_len[i] <= '0; ch_tag[i] <= '0; ch_ctrl[i] <= '0;
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
// 1. Process Hardware Status Updates (Highest Priority for Done flag)
|
||||||
|
for (int i=0; i<PORTS; i++) begin
|
||||||
|
if (arb_set_done[i]) begin
|
||||||
|
ch_req[i] <= 1'b0;
|
||||||
|
ch_done[i] <= 1'b1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// 2. Process CPU Writes
|
||||||
|
if (do_write && wr_ch < PORTS) begin
|
||||||
|
case (wr_reg)
|
||||||
|
6'h00: if (!ch_req[wr_ch]) ch_src_addr[wr_ch] <= apply_wstrb(ch_src_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
|
||||||
|
6'h04: if (!ch_req[wr_ch]) ch_dst_addr[wr_ch] <= apply_wstrb(ch_dst_addr[wr_ch], s_axil_wdata, s_axil_wstrb);
|
||||||
|
6'h08: if (!ch_req[wr_ch]) ch_len[wr_ch] <= apply_wstrb(ch_len[wr_ch], s_axil_wdata, s_axil_wstrb);
|
||||||
|
6'h0C: if (!ch_req[wr_ch]) ch_tag[wr_ch] <= apply_wstrb(ch_tag[wr_ch], s_axil_wdata, s_axil_wstrb);
|
||||||
|
6'h10: begin
|
||||||
|
ch_ctrl[wr_ch] <= apply_wstrb(ch_ctrl[wr_ch], s_axil_wdata, s_axil_wstrb);
|
||||||
|
// Trigger Bit Processing
|
||||||
|
if (s_axil_wstrb[0] && s_axil_wdata[0]) begin
|
||||||
|
ch_req[wr_ch] <= 1'b1;
|
||||||
|
// Clean up done bit automatically upon new start
|
||||||
|
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
6'h14: begin
|
||||||
|
// Software W1C for Done flag (Bit 1)
|
||||||
|
// ONLY clear if hardware is not setting it in the exact same cycle
|
||||||
|
if (s_axil_wstrb[0] && s_axil_wdata[1]) begin
|
||||||
|
if (!arb_set_done[wr_ch]) ch_done[wr_ch] <= 1'b0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Round-Robin Arbiter & Engine Driver
|
||||||
|
// ==========================================
|
||||||
|
typedef enum logic [1:0] {IDLE, RUN} state_t;
|
||||||
|
state_t state;
|
||||||
|
|
||||||
|
logic [CH_BITS-1:0] cur_ch;
|
||||||
|
logic [CH_BITS-1:0] rr_ptr;
|
||||||
|
|
||||||
|
// Interfaces to Engine
|
||||||
|
logic engine_start;
|
||||||
|
logic [ADDR_WIDTH-1:0] engine_src;
|
||||||
|
logic [ADDR_WIDTH-1:0] engine_dst;
|
||||||
|
logic [31:0] engine_bytes;
|
||||||
|
logic [7:0] engine_len;
|
||||||
|
logic [2:0] engine_size;
|
||||||
|
logic engine_done;
|
||||||
|
|
||||||
|
// Dynamic routing to the engine based on current active channel
|
||||||
|
assign engine_src = ch_src_addr[cur_ch];
|
||||||
|
assign engine_dst = ch_dst_addr[cur_ch];
|
||||||
|
assign engine_bytes = ch_len[cur_ch];
|
||||||
|
assign engine_len = ch_ctrl[cur_ch][13:6];
|
||||||
|
assign engine_size = ch_ctrl[cur_ch][5:3];
|
||||||
|
|
||||||
|
always_ff @(posedge clk) begin
|
||||||
|
if (!rst_n) begin
|
||||||
|
state <= IDLE;
|
||||||
|
engine_start <= 1'b0;
|
||||||
|
rr_ptr <= '0;
|
||||||
|
cur_ch <= '0;
|
||||||
|
arb_set_done <= '0;
|
||||||
|
end else begin
|
||||||
|
arb_set_done <= '0;
|
||||||
|
engine_start <= 1'b0;
|
||||||
|
|
||||||
|
case (state)
|
||||||
|
IDLE: begin
|
||||||
|
for (int i = 0; i < PORTS; i++) begin
|
||||||
|
logic [CH_BITS:0] check_ch_ext;
|
||||||
|
logic [CH_BITS-1:0] check_ch;
|
||||||
|
|
||||||
|
// Calculate next channel safely avoiding modulo operators in loop
|
||||||
|
check_ch_ext = {1'b0, rr_ptr} + i[CH_BITS:0];
|
||||||
|
check_ch = (check_ch_ext >= PORTS) ? (check_ch_ext - PORTS) : check_ch_ext[CH_BITS-1:0];
|
||||||
|
|
||||||
|
if (ch_req[check_ch] && !arb_set_done[check_ch]) begin
|
||||||
|
cur_ch <= check_ch;
|
||||||
|
rr_ptr <= (check_ch == (PORTS - 1)) ? '0 : (check_ch + 1);
|
||||||
|
engine_start <= 1'b1;
|
||||||
|
state <= RUN;
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
RUN: begin
|
||||||
|
if (engine_done) begin
|
||||||
|
arb_set_done[cur_ch] <= 1'b1;
|
||||||
|
state <= IDLE;
|
||||||
|
end
|
||||||
|
// Optional: Add a watchdog timeout counter here if dealing with untrusted PCIe/AXI endpoints
|
||||||
|
end
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Instantiate The Original Core Engine
|
||||||
|
// ==========================================
|
||||||
|
snix_axi_mm2mm #(
|
||||||
|
.ADDR_WIDTH(ADDR_WIDTH),
|
||||||
|
.DATA_WIDTH(DATA_WIDTH),
|
||||||
|
.ID_WIDTH (ID_WIDTH),
|
||||||
|
.USER_WIDTH(USER_WIDTH),
|
||||||
|
.FIFO_DEPTH(FIFO_DEPTH)
|
||||||
|
) u_core_engine (
|
||||||
|
.clk (clk),
|
||||||
|
.rst_n (rst_n),
|
||||||
|
.ctrl_start (engine_start),
|
||||||
|
.ctrl_stop (1'b0), // Tied off; can be wired if global abort is needed
|
||||||
|
.ctrl_src_addr (engine_src),
|
||||||
|
.ctrl_dst_addr (engine_dst),
|
||||||
|
.ctrl_len (engine_len),
|
||||||
|
.ctrl_size (engine_size),
|
||||||
|
.ctrl_transfer_len (engine_bytes),
|
||||||
|
.ctrl_done (engine_done),
|
||||||
|
// AXI4 Port Connections
|
||||||
|
.mm2mm_awid (mm2mm_awid), .mm2mm_awaddr (mm2mm_awaddr),
|
||||||
|
.mm2mm_awlen (mm2mm_awlen), .mm2mm_awsize (mm2mm_awsize),
|
||||||
|
.mm2mm_awburst(mm2mm_awburst),.mm2mm_awlock (mm2mm_awlock),
|
||||||
|
.mm2mm_awcache(mm2mm_awcache),.mm2mm_awprot (mm2mm_awprot),
|
||||||
|
.mm2mm_awqos (mm2mm_awqos), .mm2mm_awuser (mm2mm_awuser),
|
||||||
|
.mm2mm_awvalid(mm2mm_awvalid),.mm2mm_awready(mm2mm_awready),
|
||||||
|
.mm2mm_wdata (mm2mm_wdata), .mm2mm_wstrb (mm2mm_wstrb),
|
||||||
|
.mm2mm_wlast (mm2mm_wlast), .mm2mm_wuser (mm2mm_wuser),
|
||||||
|
.mm2mm_wvalid (mm2mm_wvalid), .mm2mm_wready (mm2mm_wready),
|
||||||
|
.mm2mm_bid (mm2mm_bid), .mm2mm_bresp (mm2mm_bresp),
|
||||||
|
.mm2mm_buser (mm2mm_buser), .mm2mm_bvalid (mm2mm_bvalid),
|
||||||
|
.mm2mm_bready (mm2mm_bready),
|
||||||
|
.mm2mm_arid (mm2mm_arid), .mm2mm_araddr (mm2mm_araddr),
|
||||||
|
.mm2mm_arlen (mm2mm_arlen), .mm2mm_arsize (mm2mm_arsize),
|
||||||
|
.mm2mm_arburst(mm2mm_arburst),.mm2mm_arlock (mm2mm_arlock),
|
||||||
|
.mm2mm_arcache(mm2mm_arcache),.mm2mm_arprot (mm2mm_arprot),
|
||||||
|
.mm2mm_arqos (mm2mm_arqos), .mm2mm_aruser (mm2mm_aruser),
|
||||||
|
.mm2mm_arvalid(mm2mm_arvalid),.mm2mm_arready(mm2mm_arready),
|
||||||
|
.mm2mm_rid (mm2mm_rid), .mm2mm_rdata (mm2mm_rdata),
|
||||||
|
.mm2mm_rresp (mm2mm_rresp), .mm2mm_rlast (mm2mm_rlast),
|
||||||
|
.mm2mm_ruser (mm2mm_ruser), .mm2mm_rvalid (mm2mm_rvalid),
|
||||||
|
.mm2mm_rready (mm2mm_rready)
|
||||||
|
);
|
||||||
|
|
||||||
|
endmodule
|
||||||
180
rtl/soc_top.v
180
rtl/soc_top.v
@@ -30,6 +30,8 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
|||||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------*/
|
------------------------------------------------------------------------------*/
|
||||||
|
`timescale 1ns / 1ps
|
||||||
|
|
||||||
//1f00_0000 apb
|
//1f00_0000 apb
|
||||||
//1f10_0000 dvi
|
//1f10_0000 dvi
|
||||||
//1f20_0000 confreg
|
//1f20_0000 confreg
|
||||||
@@ -1475,43 +1477,6 @@ fft_controller u_fft_controller(
|
|||||||
.fft_finish (fft_finish )
|
.fft_finish (fft_finish )
|
||||||
);
|
);
|
||||||
|
|
||||||
// assign dma_m_arid = 4'b0 ;
|
|
||||||
// assign dma_m_araddr = 32'h0;
|
|
||||||
// assign dma_m_arlen = 8'b0 ;
|
|
||||||
// assign dma_m_arsize = 3'b0 ;
|
|
||||||
// assign dma_m_arburst = 2'b0;
|
|
||||||
// assign dma_m_arlock = 1'b0;
|
|
||||||
// assign dma_m_arcache = 4'b0;
|
|
||||||
// assign dma_m_arprot = 3'b0;
|
|
||||||
// assign dma_m_arvalid = 1'b0;
|
|
||||||
// assign dma_m_rready = 1'b1;
|
|
||||||
// assign dma_m_awid = 4'b0;
|
|
||||||
// assign dma_m_awaddr = 32'b0;
|
|
||||||
// assign dma_m_awlen = 8'b0;
|
|
||||||
// assign dma_m_awsize = 3'b0;
|
|
||||||
// assign dma_m_awburst = 2'b0;
|
|
||||||
// assign dma_m_awlock = 1'b0;
|
|
||||||
// assign dma_m_awcache = 4'b0;
|
|
||||||
// assign dma_m_awprot = 3'b0;
|
|
||||||
// assign dma_m_awvalid = 1'b0;
|
|
||||||
// assign dma_m_wid = 4'b0;
|
|
||||||
// assign dma_m_wdata = 32'b0;
|
|
||||||
// assign dma_m_wstrb = 4'b0;
|
|
||||||
// assign dma_m_wlast = 1'b0;
|
|
||||||
// assign dma_m_wvalid = 1'b0;
|
|
||||||
// assign dma_m_bready = 1'b1;
|
|
||||||
// assign dma_s_arready = 1'b1;
|
|
||||||
// assign dma_s_rid = 5'b0;
|
|
||||||
// assign dma_s_rdata = 32'b0;
|
|
||||||
// assign dma_s_rresp = 2'b0;
|
|
||||||
// assign dma_s_rlast = 1'b0;
|
|
||||||
// assign dma_s_rvalid = 1'b0;
|
|
||||||
// assign dma_s_awready = 1'b1;
|
|
||||||
// assign dma_s_wready = 1'b1;
|
|
||||||
// assign dma_s_bid = 5'b0;
|
|
||||||
// assign dma_s_bresp = 2'b0;
|
|
||||||
// assign dma_s_bvalid = 1'b0;
|
|
||||||
|
|
||||||
wire [31:0] cdma_s_awaddr;
|
wire [31:0] cdma_s_awaddr;
|
||||||
wire [2:0] cdma_s_awprot;
|
wire [2:0] cdma_s_awprot;
|
||||||
wire cdma_s_awvalid;
|
wire cdma_s_awvalid;
|
||||||
@@ -1600,77 +1565,84 @@ u_axi_axil_adapter(
|
|||||||
.m_axil_rready(cdma_s_rready)
|
.m_axil_rready(cdma_s_rready)
|
||||||
);
|
);
|
||||||
|
|
||||||
snix_axi_cdma #(
|
// 8 通道 DMA
|
||||||
.ADDR_WIDTH (32 ),
|
snix_axil_cdma_mux #(
|
||||||
.DATA_WIDTH (32 ),
|
.ADDR_WIDTH (32),
|
||||||
.AXIL_ADDR_WIDTH (32 ),
|
.DATA_WIDTH (32), // 适配你的 32-bit 总线
|
||||||
.AXIL_DATA_WIDTH (32 ),
|
.AXIL_ADDR_WIDTH (32),
|
||||||
.ID_WIDTH (4 ),
|
.AXIL_DATA_WIDTH (32),
|
||||||
.USER_WIDTH (1 ))
|
.ID_WIDTH (4), // 匹配 Crossbar 的 Master ID 宽度
|
||||||
u_snix_axi_cdma(
|
.USER_WIDTH (1),
|
||||||
.clk (sys_clk ),
|
.PORTS (4), // 8 个通道
|
||||||
.rst_n (sys_resetn ),
|
.FIFO_DEPTH (64)
|
||||||
.s_axil_awaddr (cdma_s_awaddr ),
|
) u_snix_axil_cdma_mux_8ch (
|
||||||
.s_axil_awvalid (cdma_s_awvalid ),
|
.clk (sys_clk),
|
||||||
.s_axil_awready (cdma_s_awready ),
|
.rst_n (sys_resetn), // 低电平复位
|
||||||
.s_axil_wdata (cdma_s_wdata ),
|
|
||||||
.s_axil_wstrb (cdma_s_wstrb ),
|
|
||||||
.s_axil_wvalid (cdma_s_wvalid ),
|
|
||||||
.s_axil_wready (cdma_s_wready ),
|
|
||||||
.s_axil_bresp (cdma_s_bresp ),
|
|
||||||
.s_axil_bvalid (cdma_s_bvalid ),
|
|
||||||
.s_axil_bready (cdma_s_bready ),
|
|
||||||
.s_axil_araddr (cdma_s_araddr ),
|
|
||||||
.s_axil_arvalid (cdma_s_arvalid ),
|
|
||||||
.s_axil_arready (cdma_s_arready ),
|
|
||||||
.s_axil_rdata (cdma_s_rdata ),
|
|
||||||
.s_axil_rresp (cdma_s_rresp ),
|
|
||||||
.s_axil_rvalid (cdma_s_rvalid ),
|
|
||||||
.s_axil_rready (cdma_s_rready ),
|
|
||||||
|
|
||||||
.mm2mm_awid (dma_m_awid ),
|
// AXI-Lite 从机接口 (接 CPU 发来的配置请求)
|
||||||
.mm2mm_awaddr (dma_m_awaddr ),
|
.s_axil_awaddr (cdma_s_awaddr),
|
||||||
.mm2mm_awlen (dma_m_awlen ),
|
.s_axil_awvalid (cdma_s_awvalid),
|
||||||
.mm2mm_awsize (dma_m_awsize ),
|
.s_axil_awready (cdma_s_awready),
|
||||||
.mm2mm_awburst (dma_m_awburst ),
|
.s_axil_wdata (cdma_s_wdata),
|
||||||
.mm2mm_awlock (dma_m_awlock ),
|
.s_axil_wstrb (cdma_s_wstrb),
|
||||||
.mm2mm_awcache (dma_m_awcache ),
|
.s_axil_wvalid (cdma_s_wvalid),
|
||||||
.mm2mm_awprot (dma_m_awprot ),
|
.s_axil_wready (cdma_s_wready),
|
||||||
.mm2mm_awqos ( ),
|
.s_axil_bresp (cdma_s_bresp),
|
||||||
.mm2mm_awuser ( ),
|
.s_axil_bvalid (cdma_s_bvalid),
|
||||||
.mm2mm_awvalid (dma_m_awvalid ),
|
.s_axil_bready (cdma_s_bready),
|
||||||
.mm2mm_awready (dma_m_awready ),
|
.s_axil_araddr (cdma_s_araddr),
|
||||||
.mm2mm_wdata (dma_m_wdata ),
|
.s_axil_arvalid (cdma_s_arvalid),
|
||||||
.mm2mm_wstrb (dma_m_wstrb ),
|
.s_axil_arready (cdma_s_arready),
|
||||||
.mm2mm_wlast (dma_m_wlast ),
|
.s_axil_rdata (cdma_s_rdata),
|
||||||
.mm2mm_wuser ( ),
|
.s_axil_rresp (cdma_s_rresp),
|
||||||
.mm2mm_wvalid (dma_m_wvalid ),
|
.s_axil_rvalid (cdma_s_rvalid),
|
||||||
.mm2mm_wready (dma_m_wready ),
|
.s_axil_rready (cdma_s_rready),
|
||||||
.mm2mm_bid (dma_m_bid ),
|
|
||||||
.mm2mm_bresp (dma_m_bresp ),
|
// AXI4 主机接口 (接 Crossbar 去搬运数据)
|
||||||
|
.mm2mm_awid (dma_m_awid),
|
||||||
|
.mm2mm_awaddr (dma_m_awaddr),
|
||||||
|
.mm2mm_awlen (dma_m_awlen),
|
||||||
|
.mm2mm_awsize (dma_m_awsize),
|
||||||
|
.mm2mm_awburst (dma_m_awburst),
|
||||||
|
.mm2mm_awlock (dma_m_awlock),
|
||||||
|
.mm2mm_awcache (dma_m_awcache),
|
||||||
|
.mm2mm_awprot (dma_m_awprot),
|
||||||
|
.mm2mm_awqos (), // 悬空即可
|
||||||
|
.mm2mm_awuser (), // 悬空即可
|
||||||
|
.mm2mm_awvalid (dma_m_awvalid),
|
||||||
|
.mm2mm_awready (dma_m_awready),
|
||||||
|
.mm2mm_wdata (dma_m_wdata),
|
||||||
|
.mm2mm_wstrb (dma_m_wstrb),
|
||||||
|
.mm2mm_wlast (dma_m_wlast),
|
||||||
|
.mm2mm_wuser (), // 悬空即可
|
||||||
|
.mm2mm_wvalid (dma_m_wvalid),
|
||||||
|
.mm2mm_wready (dma_m_wready),
|
||||||
|
.mm2mm_bid (dma_m_bid),
|
||||||
|
.mm2mm_bresp (dma_m_bresp),
|
||||||
.mm2mm_buser (1'b0),
|
.mm2mm_buser (1'b0),
|
||||||
.mm2mm_bvalid (dma_m_bvalid ),
|
.mm2mm_bvalid (dma_m_bvalid),
|
||||||
.mm2mm_bready (dma_m_bready ),
|
.mm2mm_bready (dma_m_bready),
|
||||||
.mm2mm_arid (dma_m_arid ),
|
.mm2mm_arid (dma_m_arid),
|
||||||
.mm2mm_araddr (dma_m_araddr ),
|
.mm2mm_araddr (dma_m_araddr),
|
||||||
.mm2mm_arlen (dma_m_arlen ),
|
.mm2mm_arlen (dma_m_arlen),
|
||||||
.mm2mm_arsize (dma_m_arsize ),
|
.mm2mm_arsize (dma_m_arsize),
|
||||||
.mm2mm_arburst (dma_m_arburst ),
|
.mm2mm_arburst (dma_m_arburst),
|
||||||
.mm2mm_arlock (dma_m_arlock ),
|
.mm2mm_arlock (dma_m_arlock),
|
||||||
.mm2mm_arcache (dma_m_arcache ),
|
.mm2mm_arcache (dma_m_arcache),
|
||||||
.mm2mm_arprot (dma_m_arprot ),
|
.mm2mm_arprot (dma_m_arprot),
|
||||||
.mm2mm_arqos ( ),
|
.mm2mm_arqos (), // 悬空即可
|
||||||
.mm2mm_aruser ( ),
|
.mm2mm_aruser (), // 悬空即可
|
||||||
.mm2mm_arvalid (dma_m_arvalid ),
|
.mm2mm_arvalid (dma_m_arvalid),
|
||||||
.mm2mm_arready (dma_m_arready ),
|
.mm2mm_arready (dma_m_arready),
|
||||||
.mm2mm_rid (dma_m_rid ),
|
.mm2mm_rid (dma_m_rid),
|
||||||
.mm2mm_rdata (dma_m_rdata ),
|
.mm2mm_rdata (dma_m_rdata),
|
||||||
.mm2mm_rresp (dma_m_rresp ),
|
.mm2mm_rresp (dma_m_rresp),
|
||||||
.mm2mm_rlast (dma_m_rlast ),
|
.mm2mm_rlast (dma_m_rlast),
|
||||||
.mm2mm_ruser (1'b0),
|
.mm2mm_ruser (1'b0),
|
||||||
.mm2mm_rvalid (dma_m_rvalid ),
|
.mm2mm_rvalid (dma_m_rvalid),
|
||||||
.mm2mm_rready (dma_m_rready ),
|
.mm2mm_rready (dma_m_rready),
|
||||||
|
|
||||||
|
// 全局中断输出
|
||||||
.dma_finish (dma_finish)
|
.dma_finish (dma_finish)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,9 @@ C_SRCS += $(COMMON_DIR)/drivers/core_time.c
|
|||||||
C_SRCS += $(COMMON_DIR)/drivers/common_func.c
|
C_SRCS += $(COMMON_DIR)/drivers/common_func.c
|
||||||
C_SRCS += $(COMMON_DIR)/drivers/dvi.c \
|
C_SRCS += $(COMMON_DIR)/drivers/dvi.c \
|
||||||
$(COMMON_DIR)/drivers/led.c \
|
$(COMMON_DIR)/drivers/led.c \
|
||||||
$(COMMON_DIR)/drivers/seg7.c
|
$(COMMON_DIR)/drivers/seg7.c \
|
||||||
|
$(COMMON_DIR)/drivers/dma.c \
|
||||||
|
$(COMMON_DIR)/drivers/fft.c
|
||||||
|
|
||||||
INCLUDES += -I./ \
|
INCLUDES += -I./ \
|
||||||
-I$(COMMON_DIR)/include \
|
-I$(COMMON_DIR)/include \
|
||||||
|
|||||||
37
sdk/software/bsp/drivers/dma.c
Normal file
37
sdk/software/bsp/drivers/dma.c
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#include "dma.h"
|
||||||
|
|
||||||
|
void dma_start_transfer(int ch_id, uint32_t src, uint32_t dst, uint32_t bytes, uint32_t tag) {
|
||||||
|
volatile dma_ch_regs_t* ch = DMA_CH(ch_id);
|
||||||
|
|
||||||
|
// 检查通道是否空闲 防止覆盖正在运行的任务
|
||||||
|
if (ch->STATUS & STATUS_BUSY_BIT) {
|
||||||
|
// printf("channel %d busy\n", ch_id);
|
||||||
|
return; // 通道正忙,处理报错或重试
|
||||||
|
}
|
||||||
|
|
||||||
|
// 写入基本地址和长度
|
||||||
|
ch->SRC_ADDR = src;
|
||||||
|
ch->DST_ADDR = dst;
|
||||||
|
ch->LENGTH = bytes;
|
||||||
|
ch->TAG = tag; // 可选,填入任务ID
|
||||||
|
|
||||||
|
// 置 Burst 属性并触发传输
|
||||||
|
// AXI SIZE: 2 (代表 2^2 = 4 Bytes,与 32-bit 数据线匹配)
|
||||||
|
// AXI LEN: 15 (代表 16 beats 突发传输,最高效)
|
||||||
|
uint32_t ctrl_val = CTRL_BURST_SIZE(2) | CTRL_BURST_LEN(15);
|
||||||
|
|
||||||
|
// 写入 CTRL 并拉高 Bit 0 (Start)
|
||||||
|
ch->CTRL = ctrl_val | CTRL_START_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dma_wait_polling(int ch_id) {
|
||||||
|
volatile dma_ch_regs_t* ch = DMA_CH(ch_id);
|
||||||
|
|
||||||
|
// 死等 Done bit 置 1
|
||||||
|
while (!(ch->STATUS & STATUS_DONE_BIT)) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// 清除 Done 标志位 (Write 1 to Clear Bit 1)
|
||||||
|
// 注意:写 1 清 0 的设计,所以我们对 bit 1 写入 1
|
||||||
|
ch->STATUS = STATUS_DONE_BIT;
|
||||||
|
}
|
||||||
14
sdk/software/bsp/drivers/fft.c
Normal file
14
sdk/software/bsp/drivers/fft.c
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
#include "fft.h"
|
||||||
|
#include "common_func.h"
|
||||||
|
|
||||||
|
void fft_start() {
|
||||||
|
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
void fft_wait() {
|
||||||
|
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int fft_get_csr() {
|
||||||
|
return RegRead(FFT_CSR_REG);
|
||||||
|
}
|
||||||
30
sdk/software/bsp/include/dma.h
Normal file
30
sdk/software/bsp/include/dma.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
volatile uint32_t SRC_ADDR; // 0x00: 源地址
|
||||||
|
volatile uint32_t DST_ADDR; // 0x04: 目的地址
|
||||||
|
volatile uint32_t LENGTH; // 0x08: 传输总字节数
|
||||||
|
volatile uint32_t TAG; // 0x0C: 软件 Tag (硬件不处理,留给软件标记任务用)
|
||||||
|
volatile uint32_t CTRL; // 0x10: 控制寄存器 (配置 Burst 属性 + 启动)
|
||||||
|
volatile uint32_t STATUS; // 0x14: 状态寄存器 (Busy, Done)
|
||||||
|
volatile uint32_t RESV[10]; // 0x18 ~ 0x3C: 保留空间,凑齐 0x40 字节
|
||||||
|
} dma_ch_regs_t;
|
||||||
|
|
||||||
|
// DMA 控制器基地址
|
||||||
|
#define CDMA_MUX_BASE 0xbf300000
|
||||||
|
#define DMA_CH(i) ((volatile dma_ch_regs_t*)(CDMA_MUX_BASE + (i) * 0x40))
|
||||||
|
|
||||||
|
// CTRL 寄存器位定义
|
||||||
|
#define CTRL_START_BIT (1 << 0)
|
||||||
|
#define CTRL_BURST_SIZE(x) (((x) & 0x7) << 3) // Bits [5:3]: AXI AxSIZE (0=1B, 1=2B, 2=4B, 3=8B)
|
||||||
|
#define CTRL_BURST_LEN(x) (((x) & 0xFF) << 6) // Bits [13:6]: AXI AxLEN (0=1 beat, 15=16 beats)
|
||||||
|
|
||||||
|
// STATUS 寄存器位定义
|
||||||
|
#define STATUS_BUSY_BIT (1 << 0)
|
||||||
|
#define STATUS_DONE_BIT (1 << 1)
|
||||||
|
|
||||||
|
void dma_start_transfer(int ch_id, uint32_t src, uint32_t dst, uint32_t bytes, uint32_t tag);
|
||||||
|
void dma_wait_polling(int ch_id);
|
||||||
16
sdk/software/bsp/include/fft.h
Normal file
16
sdk/software/bsp/include/fft.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define FFT_BASE 0xbf400000
|
||||||
|
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
|
||||||
|
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
|
||||||
|
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
|
||||||
|
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
|
||||||
|
#define FFT_CSR_REG (FFT_BASE + 0xF000)
|
||||||
|
#define FFT_CTRL_START (1 << 4)
|
||||||
|
#define FFT_STAT_DONE (1 << 1)
|
||||||
|
#define FFT_STAT_BUSY (1 << 0)
|
||||||
|
#define FFT_POINT_NUM 1024
|
||||||
|
|
||||||
|
void fft_start();
|
||||||
|
void fft_wait();
|
||||||
|
unsigned int fft_get_csr();
|
||||||
@@ -16,7 +16,7 @@ CFLAGS += -DSIMU=0
|
|||||||
CFLAGS += -DFLAGS_STR=\""$(CFLAGS)"\"
|
CFLAGS += -DFLAGS_STR=\""$(CFLAGS)"\"
|
||||||
CFLAGS += -g
|
CFLAGS += -g
|
||||||
#配置迭代次数
|
#配置迭代次数
|
||||||
CFLAGS += -DITERATIONS=1
|
CFLAGS += -DITERATIONS=1000
|
||||||
|
|
||||||
OBJDIR = obj
|
OBJDIR = obj
|
||||||
COMMON_DIR = ../../bsp
|
COMMON_DIR = ../../bsp
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include <common_func.h>
|
#include <common_func.h>
|
||||||
#include <confreg_time.h>
|
#include <confreg_time.h>
|
||||||
|
#include <dma.h>
|
||||||
|
|
||||||
// BSP板级支持包所需全局变量
|
// BSP板级支持包所需全局变量
|
||||||
unsigned long UART_BASE = 0xbf000000;
|
unsigned long UART_BASE = 0xbf000000;
|
||||||
@@ -11,35 +14,12 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
|
|||||||
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
||||||
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
||||||
|
|
||||||
#define FFT_BASE 0xbf400000
|
|
||||||
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
|
|
||||||
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
|
|
||||||
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
|
|
||||||
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
|
|
||||||
#define FFT_CSR_REG (FFT_BASE + 0xF000)
|
|
||||||
#define FFT_CTRL_START (1 << 4)
|
|
||||||
#define FFT_STAT_DONE (1 << 1)
|
|
||||||
#define FFT_STAT_BUSY (1 << 0)
|
|
||||||
#define FFT_POINT_NUM 1024
|
|
||||||
|
|
||||||
#define DMA_BASE 0xbf300000
|
|
||||||
#define DMA_CTRL (DMA_BASE + 0x0000)
|
|
||||||
#define DMA_LEN (DMA_BASE + 0x0004)
|
|
||||||
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
|
|
||||||
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
|
|
||||||
#define DMA_STATUS (DMA_BASE + 0x0010)
|
|
||||||
|
|
||||||
const float PI = 3.14159265358979323846;
|
|
||||||
|
|
||||||
// 加上 aligned(64) 是为了防止 DMA 突发传输时跨越缓存行或 AXI 非对齐边界
|
// 加上 aligned(64) 是为了防止 DMA 突发传输时跨越缓存行或 AXI 非对齐边界
|
||||||
uint32_t src_array[64] __attribute__((aligned(64)));
|
uint32_t src_array[64] __attribute__((aligned(64)));
|
||||||
uint32_t dst_array[64] __attribute__((aligned(64)));
|
uint32_t dst_array[64] __attribute__((aligned(64)));
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
unsigned int dma_status = RegRead(DMA_STATUS);
|
|
||||||
printf("dma_status init = %x\n", dma_status);
|
|
||||||
|
|
||||||
uint32_t data_len = 64;
|
uint32_t data_len = 64;
|
||||||
uint32_t byte_len = data_len * 4;
|
uint32_t byte_len = data_len * 4;
|
||||||
|
|
||||||
@@ -65,22 +45,9 @@ int main(int argc, char** argv)
|
|||||||
printf("uncached_src: %x\n", uncached_src);
|
printf("uncached_src: %x\n", uncached_src);
|
||||||
printf("uncached_dst: %x\n", uncached_dst);
|
printf("uncached_dst: %x\n", uncached_dst);
|
||||||
|
|
||||||
|
|
||||||
// 配置 DMA:DMA 只需要最纯粹的物理地址
|
// 配置 DMA:DMA 只需要最纯粹的物理地址
|
||||||
RegWrite(DMA_SRC_ADDR, phys_src_addr);
|
dma_start_transfer(0, phys_src_addr, phys_dst_addr, byte_len);
|
||||||
RegWrite(DMA_DST_ADDR, phys_dst_addr);
|
dma_wait_polling(0);
|
||||||
RegWrite(DMA_LEN, byte_len);
|
|
||||||
|
|
||||||
// burst_len = 15(16拍), burst_size = 2(4字节)
|
|
||||||
uint32_t burst_len = 15;
|
|
||||||
uint32_t burst_size = 2;
|
|
||||||
uint32_t ctrl_val = (burst_len << 6) | (burst_size << 3) | 0x01;
|
|
||||||
RegWrite(DMA_CTRL, ctrl_val);
|
|
||||||
|
|
||||||
// 等待 DMA 完成
|
|
||||||
while ((RegRead(DMA_STATUS) & 0x01) == 0) {
|
|
||||||
printf("polling...\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("dma passed!\n");
|
printf("dma passed!\n");
|
||||||
|
|
||||||
|
|||||||
75
sdk/software/examples/dma/result.txt
Normal file
75
sdk/software/examples/dma/result.txt
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
dma_status init = 0
|
||||||
|
src_array: 1c080d80
|
||||||
|
dst_array: 1c080c80
|
||||||
|
phys_src_array: 1c080d80
|
||||||
|
phys_dst_array: 1c080c80
|
||||||
|
uncached_src: bc080d80
|
||||||
|
uncached_dst: bc080c80
|
||||||
|
polling...
|
||||||
|
dma passed!
|
||||||
|
0: 0
|
||||||
|
1: 1
|
||||||
|
2: 4
|
||||||
|
3: 9
|
||||||
|
4: 16
|
||||||
|
5: 25
|
||||||
|
6: 36
|
||||||
|
7: 49
|
||||||
|
8: 64
|
||||||
|
9: 81
|
||||||
|
10: 100
|
||||||
|
11: 121
|
||||||
|
12: 144
|
||||||
|
13: 169
|
||||||
|
14: 196
|
||||||
|
15: 225
|
||||||
|
16: 256
|
||||||
|
17: 289
|
||||||
|
18: 324
|
||||||
|
19: 361
|
||||||
|
20: 400
|
||||||
|
21: 441
|
||||||
|
22: 484
|
||||||
|
23: 529
|
||||||
|
24: 576
|
||||||
|
25: 625
|
||||||
|
26: 676
|
||||||
|
27: 729
|
||||||
|
28: 784
|
||||||
|
29: 841
|
||||||
|
30: 900
|
||||||
|
31: 961
|
||||||
|
32: 1024
|
||||||
|
33: 1089
|
||||||
|
34: 1156
|
||||||
|
35: 1225
|
||||||
|
36: 1296
|
||||||
|
37: 1369
|
||||||
|
38: 1444
|
||||||
|
39: 1521
|
||||||
|
40: 1600
|
||||||
|
41: 1681
|
||||||
|
42: 1764
|
||||||
|
43: 1849
|
||||||
|
44: 1936
|
||||||
|
45: 2025
|
||||||
|
46: 2116
|
||||||
|
47: 2209
|
||||||
|
48: 2304
|
||||||
|
49: 2401
|
||||||
|
50: 2500
|
||||||
|
51: 2601
|
||||||
|
52: 2704
|
||||||
|
53: 2809
|
||||||
|
54: 2916
|
||||||
|
55: 3025
|
||||||
|
56: 3136
|
||||||
|
57: 3249
|
||||||
|
58: 3364
|
||||||
|
59: 3481
|
||||||
|
60: 3600
|
||||||
|
61: 3721
|
||||||
|
62: 3844
|
||||||
|
63: 3969
|
||||||
|
|
||||||
|
Success! Array to Array DMA transfer verified.
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include <common_func.h>
|
#include <common_func.h>
|
||||||
#include <confreg_time.h>
|
#include <confreg_time.h>
|
||||||
|
#include <fft.h>
|
||||||
|
|
||||||
// BSP板级支持包所需全局变量
|
// BSP板级支持包所需全局变量
|
||||||
unsigned long UART_BASE = 0xbf000000;
|
unsigned long UART_BASE = 0xbf000000;
|
||||||
@@ -10,24 +12,6 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
|
|||||||
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
||||||
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
||||||
|
|
||||||
#define FFT_BASE 0xbf400000
|
|
||||||
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
|
|
||||||
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
|
|
||||||
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
|
|
||||||
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
|
|
||||||
#define FFT_CSR_REG (FFT_BASE + 0xF000)
|
|
||||||
#define FFT_CTRL_START (1 << 4)
|
|
||||||
#define FFT_STAT_DONE (1 << 1)
|
|
||||||
#define FFT_STAT_BUSY (1 << 0)
|
|
||||||
#define FFT_POINT_NUM 1024
|
|
||||||
|
|
||||||
#define DMA_BASE 0xbf300000
|
|
||||||
#define DMA_CTRL (DMA_BASE + 0x0000)
|
|
||||||
#define DMA_LEN (DMA_BASE + 0x0004)
|
|
||||||
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
|
|
||||||
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
|
|
||||||
#define DMA_STATUS (DMA_BASE + 0x0010)
|
|
||||||
|
|
||||||
const float PI = 3.14159265358979323846;
|
const float PI = 3.14159265358979323846;
|
||||||
|
|
||||||
// 读取定时器的当前Tick
|
// 读取定时器的当前Tick
|
||||||
@@ -35,14 +19,12 @@ unsigned int get_timer_ticks() {
|
|||||||
return RegRead(CONFREG_TIMER_BASE);
|
return RegRead(CONFREG_TIMER_BASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------
|
|
||||||
// 软件FFT实现 (基2 DIT-FFT 算法)
|
// 软件FFT实现 (基2 DIT-FFT 算法)
|
||||||
// ---------------------------------------------------------
|
|
||||||
void sw_fft(float re[], float im[], int n) {
|
void sw_fft(float re[], float im[], int n) {
|
||||||
int i, j, k, l;
|
int i, j, k, l;
|
||||||
float tr, ti, ur, ui, wr, wi;
|
float tr, ti, ur, ui, wr, wi;
|
||||||
|
|
||||||
// 1. 比特翻转 (Bit Reversal)
|
// 比特翻转 (Bit Reversal)
|
||||||
j = 0;
|
j = 0;
|
||||||
for (i = 0; i < n - 1; i++) {
|
for (i = 0; i < n - 1; i++) {
|
||||||
if (i < j) {
|
if (i < j) {
|
||||||
@@ -58,7 +40,7 @@ void sw_fft(float re[], float im[], int n) {
|
|||||||
j += k;
|
j += k;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. 蝶形运算 (Butterfly Computation)
|
// 蝶形运算 (Butterfly Computation)
|
||||||
for (l = 1; l < n; l *= 2) {
|
for (l = 1; l < n; l *= 2) {
|
||||||
ur = 1.0;
|
ur = 1.0;
|
||||||
ui = 0.0;
|
ui = 0.0;
|
||||||
@@ -110,9 +92,7 @@ int main(int argc, char** argv)
|
|||||||
unsigned int tick_start, tick_end;
|
unsigned int tick_start, tick_end;
|
||||||
unsigned int hw_time, sw_time;
|
unsigned int hw_time, sw_time;
|
||||||
|
|
||||||
// ==========================================
|
// 硬件加速 FFT 测试
|
||||||
// 1. 硬件加速 FFT 测试
|
|
||||||
// ==========================================
|
|
||||||
printf("\n--- Starting Hardware FFT ---\n");
|
printf("\n--- Starting Hardware FFT ---\n");
|
||||||
tick_start = get_ns();
|
tick_start = get_ns();
|
||||||
|
|
||||||
@@ -122,10 +102,8 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
|
fft_start();
|
||||||
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {
|
fft_wait();
|
||||||
// 等待硬件计算完成
|
|
||||||
}
|
|
||||||
|
|
||||||
tick_end = get_ns();
|
tick_end = get_ns();
|
||||||
|
|
||||||
|
|||||||
19
sdk/software/examples/fft/result.txt
Normal file
19
sdk/software/examples/fft/result.txt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
fft_csr = 0
|
||||||
|
|
||||||
|
--- Starting Hardware FFT ---
|
||||||
|
--- Starting Software FFT ---
|
||||||
|
|
||||||
|
--- Performance Comparison ---
|
||||||
|
Timer Clock Freq : 50000000 Hz
|
||||||
|
Hardware FFT Time: 1695360 ns (1.695 ms)
|
||||||
|
Software FFT Time: 274547190 ns (274.547 ms)
|
||||||
|
Speedup Ratio : 161.94x
|
||||||
|
|
||||||
|
--- Verification (Only showing Bins with energy > 10) ---
|
||||||
|
Bin [ 0] Hz: HW(Re: 4007, Im: 0) | SW(Re: 4000, Im: 0)
|
||||||
|
Bin [ 10] Hz: HW(Re: 4006, Im: -1) | SW(Re: 3999, Im: 0)
|
||||||
|
Bin [ 200] Hz: HW(Re: 2, Im: -2994) | SW(Re: 0, Im: -3000)
|
||||||
|
Bin [ 400] Hz: HW(Re: -5, Im: -1496) | SW(Re: 0, Im: -1499)
|
||||||
|
Bin [ 624] Hz: HW(Re: -5, Im: 1497) | SW(Re: 0, Im: 1500)
|
||||||
|
Bin [ 824] Hz: HW(Re: 3, Im: 2994) | SW(Re: 0, Im: 2999)
|
||||||
|
Bin [1014] Hz: HW(Re: 4007, Im: 0) | SW(Re: 3999, Im: 0)
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include <common_func.h>
|
#include <common_func.h>
|
||||||
#include <confreg_time.h>
|
#include <confreg_time.h>
|
||||||
|
#include <dma.h>
|
||||||
|
#include <fft.h>
|
||||||
|
|
||||||
// BSP板级支持包所需全局变量
|
// BSP板级支持包所需全局变量
|
||||||
unsigned long UART_BASE = 0xbf000000;
|
unsigned long UART_BASE = 0xbf000000;
|
||||||
@@ -11,42 +15,8 @@ unsigned long CONFREG_TIMER_BASE = 0xbf20f100;
|
|||||||
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
unsigned long CONFREG_CLOCKS_PER_SEC = 50000000L;
|
||||||
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
unsigned long CORE_CLOCKS_PER_SEC = 33000000L;
|
||||||
|
|
||||||
#define FFT_BASE 0xbf400000
|
|
||||||
#define FFT_IN_RE_BASE (FFT_BASE + 0x1000)
|
|
||||||
#define FFT_IN_IM_BASE (FFT_BASE + 0x2000)
|
|
||||||
#define FFT_OUT_RE_BASE (FFT_BASE + 0x3000)
|
|
||||||
#define FFT_OUT_IM_BASE (FFT_BASE + 0x4000)
|
|
||||||
#define FFT_CSR_REG (FFT_BASE + 0xF000)
|
|
||||||
#define FFT_CTRL_START (1 << 4)
|
|
||||||
#define FFT_STAT_DONE (1 << 1)
|
|
||||||
#define FFT_STAT_BUSY (1 << 0)
|
|
||||||
#define FFT_POINT_NUM 1024
|
|
||||||
|
|
||||||
#define DMA_BASE 0xbf300000
|
|
||||||
#define DMA_CTRL (DMA_BASE + 0x0000)
|
|
||||||
#define DMA_LEN (DMA_BASE + 0x0004)
|
|
||||||
#define DMA_SRC_ADDR (DMA_BASE + 0x0008)
|
|
||||||
#define DMA_DST_ADDR (DMA_BASE + 0x000c)
|
|
||||||
#define DMA_STATUS (DMA_BASE + 0x0010)
|
|
||||||
|
|
||||||
const float PI = 3.14159265358979323846;
|
const float PI = 3.14159265358979323846;
|
||||||
|
|
||||||
// DMA 传输通用封装函数 (阻塞等待模式)
|
|
||||||
void dma_transfer(uint32_t phys_src, uint32_t phys_dst, uint32_t byte_len) {
|
|
||||||
RegWrite(DMA_SRC_ADDR, phys_src);
|
|
||||||
RegWrite(DMA_DST_ADDR, phys_dst);
|
|
||||||
RegWrite(DMA_LEN, byte_len);
|
|
||||||
|
|
||||||
// burst_len = 15(16拍), burst_size = 2(4字节), start = 1
|
|
||||||
uint32_t ctrl_val = (15 << 6) | (2 << 3) | 0x01;
|
|
||||||
RegWrite(DMA_CTRL, ctrl_val);
|
|
||||||
|
|
||||||
// 轮询等待 DMA 搬运完成
|
|
||||||
while ((RegRead(DMA_STATUS) & 0x01) == 0) {
|
|
||||||
// CPU 空转等待
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 软件FFT实现 (基2 DIT-FFT 算法)
|
// 软件FFT实现 (基2 DIT-FFT 算法)
|
||||||
void sw_fft(float re[], float im[], int n) {
|
void sw_fft(float re[], float im[], int n) {
|
||||||
int i, j, k, l;
|
int i, j, k, l;
|
||||||
@@ -152,18 +122,22 @@ int main(int argc, char** argv)
|
|||||||
uint32_t transfer_bytes = FFT_POINT_NUM * 4; // 1024个点 * 4字节
|
uint32_t transfer_bytes = FFT_POINT_NUM * 4; // 1024个点 * 4字节
|
||||||
|
|
||||||
// MA 将数据从内存搬运到 FFT 输入外设
|
// MA 将数据从内存搬运到 FFT 输入外设
|
||||||
dma_transfer(((uint32_t)hw_in_re_arr & 0x1FFFFFFF), phys_fft_in_re, transfer_bytes);
|
dma_start_transfer(0, ((uint32_t)hw_in_re_arr & 0x1FFFFFFF), phys_fft_in_re, transfer_bytes, 100);
|
||||||
dma_transfer(((uint32_t)hw_in_im_arr & 0x1FFFFFFF), phys_fft_in_im, transfer_bytes);
|
dma_start_transfer(1, ((uint32_t)hw_in_im_arr & 0x1FFFFFFF), phys_fft_in_im, transfer_bytes, 200);
|
||||||
|
dma_wait_polling(0);
|
||||||
|
dma_wait_polling(1);
|
||||||
|
|
||||||
|
|
||||||
// 启动 FFT 并等待计算完成
|
// 启动 FFT 并等待计算完成
|
||||||
RegWrite(FFT_CSR_REG, FFT_CTRL_START);
|
fft_start();
|
||||||
while ((RegRead(FFT_CSR_REG) & FFT_STAT_DONE) == 0) {
|
fft_wait();
|
||||||
// poll
|
|
||||||
}
|
|
||||||
|
|
||||||
// DMA 将结果从 FFT 输出外设搬回内存
|
// DMA 将结果从 FFT 输出外设搬回内存
|
||||||
dma_transfer(phys_fft_out_re, ((uint32_t)hw_out_re_arr & 0x1FFFFFFF), transfer_bytes);
|
dma_start_transfer(0, phys_fft_out_re, ((uint32_t)hw_out_re_arr & 0x1FFFFFFF), transfer_bytes, 10);
|
||||||
dma_transfer(phys_fft_out_im, ((uint32_t)hw_out_im_arr & 0x1FFFFFFF), transfer_bytes);
|
dma_start_transfer(1, phys_fft_out_im, ((uint32_t)hw_out_im_arr & 0x1FFFFFFF), transfer_bytes, 20);
|
||||||
|
dma_wait_polling(0);
|
||||||
|
dma_wait_polling(1);
|
||||||
|
|
||||||
|
|
||||||
tick_end = get_ns(); // 结束计时
|
tick_end = get_ns(); // 结束计时
|
||||||
hw_time = tick_end - tick_start;
|
hw_time = tick_end - tick_start;
|
||||||
|
|||||||
19
sdk/software/examples/fft_dma/result.txt
Normal file
19
sdk/software/examples/fft_dma/result.txt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
fft_csr init = 0
|
||||||
|
|
||||||
|
--- Starting Hardware FFT with DMA ---
|
||||||
|
--- Starting Software FFT ---
|
||||||
|
|
||||||
|
--- Performance Comparison ---
|
||||||
|
Timer Clock Freq : 50000000 Hz
|
||||||
|
Hardware FFT Time: 407310 ns (0.407 ms)
|
||||||
|
Software FFT Time: 274786920 ns (274.787 ms)
|
||||||
|
Speedup Ratio : 674.64x
|
||||||
|
|
||||||
|
--- Verification (Only showing Bins with energy > 10) ---
|
||||||
|
Bin [ 0] Hz: HW(Re: 4007, Im: 0) | SW(Re: 4000, Im: 0)
|
||||||
|
Bin [ 10] Hz: HW(Re: 4006, Im: -1) | SW(Re: 3999, Im: 0)
|
||||||
|
Bin [ 200] Hz: HW(Re: 2, Im: -2994) | SW(Re: 0, Im: -3000)
|
||||||
|
Bin [ 400] Hz: HW(Re: -5, Im: -1496) | SW(Re: 0, Im: -1499)
|
||||||
|
Bin [ 624] Hz: HW(Re: -5, Im: 1497) | SW(Re: 0, Im: 1500)
|
||||||
|
Bin [ 824] Hz: HW(Re: 3, Im: 2994) | SW(Re: 0, Im: 2999)
|
||||||
|
Bin [1014] Hz: HW(Re: 4007, Im: 0) | SW(Re: 3999, Im: 0)
|
||||||
@@ -3,7 +3,8 @@
|
|||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "common_func.h"
|
#include <common_func.h>
|
||||||
|
#include <dvi.h>
|
||||||
|
|
||||||
//BSP板级支持包所需全局变量
|
//BSP板级支持包所需全局变量
|
||||||
unsigned long UART_BASE = 0xbf000000; //UART16550的虚地址
|
unsigned long UART_BASE = 0xbf000000; //UART16550的虚地址
|
||||||
|
|||||||
@@ -184,12 +184,7 @@ always @(posedge clk)
|
|||||||
begin
|
begin
|
||||||
if(uart_display)
|
if(uart_display)
|
||||||
begin
|
begin
|
||||||
if(uart_data==8'hff)
|
if(uart_data !=8'hff) begin
|
||||||
begin
|
|
||||||
;//$finish;
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
$write("%c",uart_data);
|
$write("%c",uart_data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
37
sim/sram.v
37
sim/sram.v
@@ -27,23 +27,28 @@ module sram_sp #(
|
|||||||
|
|
||||||
assign write_enable[3:0] = (~ram_be_n) & {4{(~ram_ce_n) & (~ram_we_n)}};
|
assign write_enable[3:0] = (~ram_be_n) & {4{(~ram_ce_n) & (~ram_we_n)}};
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
always@(posedge write_enable[0]) begin
|
if (write_enable[0]) BRAM[ram_addr][7:0] = ram_data[7:0];
|
||||||
#10;
|
if (write_enable[1]) BRAM[ram_addr][15:8] = ram_data[15:8];
|
||||||
if(~ram_be_n[0]) BRAM[ram_addr][7:0] <= ram_data[7:0];
|
if (write_enable[2]) BRAM[ram_addr][23:16] = ram_data[23:16];
|
||||||
end
|
if (write_enable[3]) BRAM[ram_addr][31:24] = ram_data[31:24];
|
||||||
always@(posedge write_enable[1]) begin
|
|
||||||
#10;
|
|
||||||
if(~ram_be_n[1]) BRAM[ram_addr][15:8] <= ram_data[15:8];
|
|
||||||
end
|
|
||||||
always@(posedge write_enable[2]) begin
|
|
||||||
#10;
|
|
||||||
if(~ram_be_n[2]) BRAM[ram_addr][23:16] <= ram_data[23:16];
|
|
||||||
end
|
|
||||||
always@(posedge write_enable[3]) begin
|
|
||||||
#10;
|
|
||||||
if(~ram_be_n[3]) BRAM[ram_addr][31:24] <= ram_data[31:24];
|
|
||||||
end
|
end
|
||||||
|
// always@(posedge write_enable[0]) begin
|
||||||
|
// #10;
|
||||||
|
// if(~ram_be_n[0]) BRAM[ram_addr][7:0] <= ram_data[7:0];
|
||||||
|
// end
|
||||||
|
// always@(posedge write_enable[1]) begin
|
||||||
|
// #10;
|
||||||
|
// if(~ram_be_n[1]) BRAM[ram_addr][15:8] <= ram_data[15:8];
|
||||||
|
// end
|
||||||
|
// always@(posedge write_enable[2]) begin
|
||||||
|
// #10;
|
||||||
|
// if(~ram_be_n[2]) BRAM[ram_addr][23:16] <= ram_data[23:16];
|
||||||
|
// end
|
||||||
|
// always@(posedge write_enable[3]) begin
|
||||||
|
// #10;
|
||||||
|
// if(~ram_be_n[3]) BRAM[ram_addr][31:24] <= ram_data[31:24];
|
||||||
|
// end
|
||||||
|
|
||||||
wire [31:0] RDATA = BRAM[ram_addr];
|
wire [31:0] RDATA = BRAM[ram_addr];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user