- 若该文为原创文章,未经允许不得转载
- 风释雪
- QQ:627833006
- WX:Cheng18375816918
- CSDN博客: 风释雪FPGA
- 知乎:风释雪FPGA
1.版本说明
日期 | 作者 | 版本说明 |
---|---|---|
2024xxxx | 风释雪 | 初始版本 |
2.概述
ZYNQ 7010/7020 HR/HP Bank LVDS Rx/TX;
3.目标
ZYNQ 7010 LVDS 1:7 接收代码实现;
ZYNQ 7010 LVDS 1:7 仿真环境下发送与接收实现;
4.时序
使用标准BT1120 内同步,将图像按照图中时序发送,其中,时钟比数据等于 1:7, 该图为标准1:7视频传输接口;
以1080P60为例: Pixel Clk = 148.5, 共4个lane, 每个lane线速度等于 : 148.5Mhz x 7bit = 1.0395Gbps;
5.分析
-
从TX的角度,理解数据是怎么发送的;
-
从RX的角度,知道数据应该怎么接收;
-
TX端GearBox 实现速度的变化(7bit数据变8bit数据),让发送数据满足oserdes要求;
-
RX端GearBox 实现速度的变化(7bit数据变8bit数据),让数据恢复原始YUV422-8BIT值;
-
LVDS 1:7 在大多数情况下无法直接发送,需要转为8bit(ZYNQ支持,但是MPSOC不支持,本文为了统一方法和讲清楚核心技术,使用8bit)
-
ISERDES/OSERDES性能要求 LineRate >= 1.0395Gbps
-
DATA_WIDTH=8 (后续MPSOC/ultrascale FPGA后续不再支持DataWidth = 7/14)
-
BITSLIP不使用 (后续MPSOC/ultrascale FPGA后续不再支持,且软件控制逻辑简单)
6.XILINX原语
- IDELAY (调整数据延时,影响采集位置)
- ISERDES / OSERDES (重点理解时序图)
- IBUFDS
- MMCM (输出时钟相位相同,频率两倍)
重点参考UG471
- OSERDES 数据发送 D1(A) -> D2(B) -> D3(C) -> D4(D) -> D5(E) -> D6(F) -> D7(G) -> D8(H)
- ISERDES 数据接收 Q8(A) -> Q7(B) -> Q6(C) -> Q5(D) -> Q4(E) -> Q3(F) -> Q2(G) -> Q1(H)
IDELAYE2 #(.CINVCTRL_SEL ( "FALSE" ), // Enable dynamic clock inversion (FALSE, TRUE).DELAY_SRC ( "IDATAIN" ), // Delay input (IDATAIN, DATAIN).HIGH_PERFORMANCE_MODE ( "TRUE" ), // Reduced jitter ("TRUE"), Reduced power ("FALSE").IDELAY_TYPE ( "VAR_LOAD" ), // FIXED, VARIABLE, VAR_LOAD, VAR_LOAD_PIPE.IDELAY_VALUE ( 0 ), // Input delay tap setting (0-31).PIPE_SEL ( "FALSE" ), // Select pipelined mode, FALSE, TRUE.REFCLK_FREQUENCY ( 200.0 ), // IDELAYCTRL clock input frequency in MHz (190.0-210.0, 290.0-310.0)..SIGNAL_PATTERN ( "DATA" ) // DATA, CLOCK input signal) IDELAYE2_DAT (.CNTVALUEOUT ( ), // 5-bit output: Counter value output.IDATAIN ( dat_serial[i] ), // 1-bit input: Data input from the I/O.DATAOUT ( dat_serial_dly[i] ), // 1-bit output: Delayed data output.C ( lvds_rx_clk ), // 1-bit input: Clock input.CE ( 1'd0 ), // 1-bit input: Active high enable increment/decrement input.INC ( 1'd0 ), // 1-bit input: Increment / Decrement tap delay input.CINVCTRL ( 1'd0 ), // 1-bit input: Dynamic clock inversion input.DATAIN ( 1'd0 ), // 1-bit input: Internal delay data input.LDPIPEEN ( 1'd1 ), // 1-bit input: Enable PIPELINE register to load data input.LD ( idly_load ), // 1-bit input: Load IDELAY_VALUE input.CNTVALUEIN ( idly_cntvalue_curr ), // 5-bit input: Counter value input.REGRST ( lvds_serdes_rst ) // 1-bit input: Active-high reset tap-delay input);ISERDESE2 #(.DATA_RATE ( "DDR" ), // DDR, SDR.DATA_WIDTH ( 8 ), // Parallel data width (2-8,10,14).DYN_CLKDIV_INV_EN ( "FALSE" ), // Enable DYNCLKDIVINVSEL inversion (FALSE, TRUE).DYN_CLK_INV_EN ( "FALSE" ), // Enable DYNCLKINVSEL inversion (FALSE, TRUE).INIT_Q1 ( 1'b0 ),.INIT_Q2 ( 1'b0 ),.INIT_Q3 ( 1'b0 ),.INIT_Q4 ( 1'b0 ),.INTERFACE_TYPE ( "NETWORKING" ), // MEMORY, MEMORY_DDR3, MEMORY_QDR, NETWORKING, OVERSAMPLE.IOBDELAY ( "BOTH" ), // NONE, BOTH, IBUF, IFD.NUM_CE ( 2 ), // Number of clock enables (1,2).OFB_USED ( "FALSE" ), // Select OFB path (FALSE, TRUE).SERDES_MODE ( "MASTER" ), // MASTER, SLAVE.SRVAL_Q1 ( 1'b0 ),.SRVAL_Q2 ( 1'b0 ),.SRVAL_Q3 ( 1'b0 ),.SRVAL_Q4 ( 1'b0 )) ISERDESE2_DAT (.O ( ),.Q1 ( dat_paralle[8*i+8-1:8*i+7]),.Q2 ( dat_paralle[8*i+7-1:8*i+6]),.Q3 ( dat_paralle[8*i+6-1:8*i+5]),.Q4 ( dat_paralle[8*i+5-1:8*i+4]),.Q5 ( dat_paralle[8*i+4-1:8*i+3]),.Q6 ( dat_paralle[8*i+3-1:8*i+2]),.Q7 ( dat_paralle[8*i+2-1:8*i+1]),.Q8 ( dat_paralle[8*i+1-1:8*i+0]),.SHIFTOUT1 ( ),.SHIFTOUT2 ( ),.BITSLIP ( 1'd0 ),.CE1 ( 1'd1 ),.CE2 ( 1'd1 ),.CLKDIVP ( 1'd0 ),.CLK ( lvds_rx_clk_x4 ),// 1-bit input: High-speed clock.CLKB (~lvds_rx_clk_x4 ),// 1-bit input: High-speed secondary clock.CLKDIV ( lvds_rx_clk ),// 1-bit input: Divided clock.DYNCLKDIVSEL ( 1'd0 ),// 1-bit input: Dynamic CLKDIV inversion.DYNCLKSEL ( 1'd0 ),// 1-bit input: Dynamic CLK/CLKB inversion.DDLY ( dat_serial_dly[i] ),// 1-bit input: Serial data from IDELAYE2.D ( 1'd0 ),// 1-bit input: Data input.OFB ( 1'd0 ),// 1-bit input: Data feedback from OSERDESE2.OCLK ( 1'd0 ),// 1-bit input: High speed output clock used when INTERFACE_TYPE="MEMORY".OCLKB ( 1'd0 ),// 1-bit input: High speed negative edge output clock.RST ( lvds_serdes_rst ),// 1-bit input: Active high asynchronous reset.SHIFTIN1 ( 1'd0 ),.SHIFTIN2 ( 1'd0 ));OSERDESE2 #(.DATA_RATE_OQ ( "DDR" ), // DDR, SDR.DATA_RATE_TQ ( "SDR" ), // DDR, BUF, SDR.DATA_WIDTH ( 8 ), // Parallel data width (2-8,10,14).INIT_OQ ( 1'b0 ), // Initial value of OQ output (1'b0,1'b1).INIT_TQ ( 1'b0 ), // Initial value of TQ output (1'b0,1'b1).SERDES_MODE ( "MASTER" ), // MASTER, SLAVE.SRVAL_OQ ( 1'b0 ), // OQ output value when SR is used (1'b0,1'b1).SRVAL_TQ ( 1'b0 ), // TQ output value when SR is used (1'b0,1'b1).TBYTE_CTL ( "FALSE" ), // Enable tristate byte operation (FALSE, TRUE).TBYTE_SRC ( "FALSE" ), // Tristate byte source (FALSE, TRUE).TRISTATE_WIDTH ( 1 ) // 3-state converter width (1,4)) OSERDESE3_D (.OFB ( ),.OQ ( dat_serial [i] ),.SHIFTOUT1 ( ),.SHIFTOUT2 ( ),.TBYTEOUT ( ),.TFB ( ),.TQ ( ),.CLK ( lvds_tx_clk_x4 ),.CLKDIV ( lvds_tx_clk ),.D1 ( dat_paralle[8*i+1-1:8*i+0]),.D2 ( dat_paralle[8*i+2-1:8*i+1]),.D3 ( dat_paralle[8*i+3-1:8*i+2]),.D4 ( dat_paralle[8*i+4-1:8*i+3]),.D5 ( dat_paralle[8*i+5-1:8*i+4]),.D6 ( dat_paralle[8*i+6-1:8*i+5]),.D7 ( dat_paralle[8*i+7-1:8*i+6]),.D8 ( dat_paralle[8*i+8-1:8*i+7]),.OCE ( 1'd1 ),.RST (~lvds_tx_clk_locked ),.SHIFTIN1 ( 1'd0 ),.SHIFTIN2 ( 1'd0 ),.T1 ( 1'd0 ),.T2 ( 1'd0 ),.T3 ( 1'd0 ),.T4 ( 1'd0 ),.TBYTEIN ( 1'd0 ),.TCE ( 1'd0 ));
7.GearBox原理
assign wdata0 = dat_paralle[8*1-1:8*0];
assign wdata1 = dat_paralle[8*2-1:8*1];
assign wdata2 = dat_paralle[8*3-1:8*2];
assign wdata3 = dat_paralle[8*4-1:8*3];always @ (posedge lvds_rx_clk) begincase (waddr)4'd0 : begin temp0[8*(0 +1)-1:8*(0 )] <= wdata0; temp1[8*(0 +1)-1:8*(0 )] <= wdata1; temp2[8*(0 +1)-1:8*(0 )] <= wdata2; temp3[8*(0 +1)-1:8*(0 )] <= wdata3; end4'd1 : begin temp0[8*(1 +1)-1:8*(1 )] <= wdata0; temp1[8*(1 +1)-1:8*(1 )] <= wdata1; temp2[8*(1 +1)-1:8*(1 )] <= wdata2; temp3[8*(1 +1)-1:8*(1 )] <= wdata3; end4'd2 : begin temp0[8*(2 +1)-1:8*(2 )] <= wdata0; temp1[8*(2 +1)-1:8*(2 )] <= wdata1; temp2[8*(2 +1)-1:8*(2 )] <= wdata2; temp3[8*(2 +1)-1:8*(2 )] <= wdata3; end4'd3 : begin temp0[8*(3 +1)-1:8*(3 )] <= wdata0; temp1[8*(3 +1)-1:8*(3 )] <= wdata1; temp2[8*(3 +1)-1:8*(3 )] <= wdata2; temp3[8*(3 +1)-1:8*(3 )] <= wdata3; end4'd4 : begin temp0[8*(4 +1)-1:8*(4 )] <= wdata0; temp1[8*(4 +1)-1:8*(4 )] <= wdata1; temp2[8*(4 +1)-1:8*(4 )] <= wdata2; temp3[8*(4 +1)-1:8*(4 )] <= wdata3; end4'd5 : begin temp0[8*(5 +1)-1:8*(5 )] <= wdata0; temp1[8*(5 +1)-1:8*(5 )] <= wdata1; temp2[8*(5 +1)-1:8*(5 )] <= wdata2; temp3[8*(5 +1)-1:8*(5 )] <= wdata3; end4'd6 : begin temp0[8*(6 +1)-1:8*(6 )] <= wdata0; temp1[8*(6 +1)-1:8*(6 )] <= wdata1; temp2[8*(6 +1)-1:8*(6 )] <= wdata2; temp3[8*(6 +1)-1:8*(6 )] <= wdata3; end4'd7 : begin temp0[8*(7 +1)-1:8*(7 )] <= wdata0; temp1[8*(7 +1)-1:8*(7 )] <= wdata1; temp2[8*(7 +1)-1:8*(7 )] <= wdata2; temp3[8*(7 +1)-1:8*(7 )] <= wdata3; end4'd8 : begin temp0[8*(8 +1)-1:8*(8 )] <= wdata0; temp1[8*(8 +1)-1:8*(8 )] <= wdata1; temp2[8*(8 +1)-1:8*(8 )] <= wdata2; temp3[8*(8 +1)-1:8*(8 )] <= wdata3; end4'd9 : begin temp0[8*(9 +1)-1:8*(9 )] <= wdata0; temp1[8*(9 +1)-1:8*(9 )] <= wdata1; temp2[8*(9 +1)-1:8*(9 )] <= wdata2; temp3[8*(9 +1)-1:8*(9 )] <= wdata3; end4'd10 : begin temp0[8*(10+1)-1:8*(10)] <= wdata0; temp1[8*(10+1)-1:8*(10)] <= wdata1; temp2[8*(10+1)-1:8*(10)] <= wdata2; temp3[8*(10+1)-1:8*(10)] <= wdata3; end4'd11 : begin temp0[8*(11+1)-1:8*(11)] <= wdata0; temp1[8*(11+1)-1:8*(11)] <= wdata1; temp2[8*(11+1)-1:8*(11)] <= wdata2; temp3[8*(11+1)-1:8*(11)] <= wdata3; end4'd12 : begin temp0[8*(12+1)-1:8*(12)] <= wdata0; temp1[8*(12+1)-1:8*(12)] <= wdata1; temp2[8*(12+1)-1:8*(12)] <= wdata2; temp3[8*(12+1)-1:8*(12)] <= wdata3; end4'd13 : begin temp0[8*(13+1)-1:8*(13)] <= wdata0; temp1[8*(13+1)-1:8*(13)] <= wdata1; temp2[8*(13+1)-1:8*(13)] <= wdata2; temp3[8*(13+1)-1:8*(13)] <= wdata3; enddefault: begin endendcase
endalways @ (posedge lvds_rx_clk or negedge locked) beginif (~ locked) beginwaddr <= 4'd0;end else beginwaddr <= (waddr == 4'd13) ? 4'd0 : (waddr + 1'd1);end
endalways @ (posedge vid_clk) begincase (raddr)4'd0 : begin rdata0 <= temp0[7*(0 +1)-1:7*0 ]; rdata1 <= temp1[7*(0 +1)-1:7*0 ]; rdata2 <= temp2[7*(0 +1)-1:7*0 ]; rdata3 <= temp3[7*(0 +1)-1:7*0 ]; end4'd1 : begin rdata0 <= temp0[7*(1 +1)-1:7*1 ]; rdata1 <= temp1[7*(1 +1)-1:7*1 ]; rdata2 <= temp2[7*(1 +1)-1:7*1 ]; rdata3 <= temp3[7*(1 +1)-1:7*1 ]; end4'd2 : begin rdata0 <= temp0[7*(2 +1)-1:7*2 ]; rdata1 <= temp1[7*(2 +1)-1:7*2 ]; rdata2 <= temp2[7*(2 +1)-1:7*2 ]; rdata3 <= temp3[7*(2 +1)-1:7*2 ]; end4'd3 : begin rdata0 <= temp0[7*(3 +1)-1:7*3 ]; rdata1 <= temp1[7*(3 +1)-1:7*3 ]; rdata2 <= temp2[7*(3 +1)-1:7*3 ]; rdata3 <= temp3[7*(3 +1)-1:7*3 ]; end4'd4 : begin rdata0 <= temp0[7*(4 +1)-1:7*4 ]; rdata1 <= temp1[7*(4 +1)-1:7*4 ]; rdata2 <= temp2[7*(4 +1)-1:7*4 ]; rdata3 <= temp3[7*(4 +1)-1:7*4 ]; end4'd5 : begin rdata0 <= temp0[7*(5 +1)-1:7*5 ]; rdata1 <= temp1[7*(5 +1)-1:7*5 ]; rdata2 <= temp2[7*(5 +1)-1:7*5 ]; rdata3 <= temp3[7*(5 +1)-1:7*5 ]; end4'd6 : begin rdata0 <= temp0[7*(6 +1)-1:7*6 ]; rdata1 <= temp1[7*(6 +1)-1:7*6 ]; rdata2 <= temp2[7*(6 +1)-1:7*6 ]; rdata3 <= temp3[7*(6 +1)-1:7*6 ]; end4'd7 : begin rdata0 <= temp0[7*(7 +1)-1:7*7 ]; rdata1 <= temp1[7*(7 +1)-1:7*7 ]; rdata2 <= temp2[7*(7 +1)-1:7*7 ]; rdata3 <= temp3[7*(7 +1)-1:7*7 ]; end4'd8 : begin rdata0 <= temp0[7*(8 +1)-1:7*8 ]; rdata1 <= temp1[7*(8 +1)-1:7*8 ]; rdata2 <= temp2[7*(8 +1)-1:7*8 ]; rdata3 <= temp3[7*(8 +1)-1:7*8 ]; end4'd9 : begin rdata0 <= temp0[7*(9 +1)-1:7*9 ]; rdata1 <= temp1[7*(9 +1)-1:7*9 ]; rdata2 <= temp2[7*(9 +1)-1:7*9 ]; rdata3 <= temp3[7*(9 +1)-1:7*9 ]; end4'd10 : begin rdata0 <= temp0[7*(10+1)-1:7*10]; rdata1 <= temp1[7*(10+1)-1:7*10]; rdata2 <= temp2[7*(10+1)-1:7*10]; rdata3 <= temp3[7*(10+1)-1:7*10]; end4'd11 : begin rdata0 <= temp0[7*(11+1)-1:7*11]; rdata1 <= temp1[7*(11+1)-1:7*11]; rdata2 <= temp2[7*(11+1)-1:7*11]; rdata3 <= temp3[7*(11+1)-1:7*11]; end4'd12 : begin rdata0 <= temp0[7*(12+1)-1:7*12]; rdata1 <= temp1[7*(12+1)-1:7*12]; rdata2 <= temp2[7*(12+1)-1:7*12]; rdata3 <= temp3[7*(12+1)-1:7*12]; end4'd13 : begin rdata0 <= temp0[7*(13+1)-1:7*13]; rdata1 <= temp1[7*(13+1)-1:7*13]; rdata2 <= temp2[7*(13+1)-1:7*13]; rdata3 <= temp3[7*(13+1)-1:7*13]; end4'd14 : begin rdata0 <= temp0[7*(14+1)-1:7*14]; rdata1 <= temp1[7*(14+1)-1:7*14]; rdata2 <= temp2[7*(14+1)-1:7*14]; rdata3 <= temp3[7*(14+1)-1:7*14]; end4'd15 : begin rdata0 <= temp0[7*(15+1)-1:7*15]; rdata1 <= temp1[7*(15+1)-1:7*15]; rdata2 <= temp2[7*(15+1)-1:7*15]; rdata3 <= temp3[7*(15+1)-1:7*15]; enddefault: begin endendcase
endalways @ (posedge vid_clk or negedge locked) beginif (~ locked) beginraddr <= 4'd8;end else beginraddr <= (raddr == 4'd15) ? 4'd0 : (raddr + 1'd1);end
end
8.软件架构
- LVDS接收的重点、难点,
在于如何训练得到正确的数据采集时钟相位
,最终稳定工作, 在整个软件架构中,通过状态机的形式,对IDELAY的延时参数反复修改,自动测试, 最终得到最优延时点,并精准采集数据; - 如何判断是否数据锁定,BT1120内同步有完整的VS/HS/DE,通过对VS/HS/DE 进行解析并判断图像行场完整性,就可以确认输入是否正常。
- 如何判断是否采样点最优,通过不断的调整idelay的值,可以找到能锁定图像输入的 最小和最大delay,取平均值,即为眼图的正中间,优化点!
7.软件实现(部分)
详见代码
如何寻找最优位址/********************************************************************************
*
* LVDS CLK Domain: Auto Training Function
*
********************************************************************************/
always @ (posedge lvds_rx_clk) beginif (lvds_serdes_rst) beginlvds_align_train_temp_code <= 3'd0;lvds_align_train_temp_locked <= 1'd0;lvds_align_train_temp_min <= {(DRP_BITS){1'd0}};lvds_align_train_temp_max <= {(DRP_BITS){1'd0}};lvds_align_train_calc_min <= {(DRP_BITS){1'd0}};lvds_align_train_calc_max <= {(DRP_BITS){1'd0}};lvds_align_train_calc_sub <= {(DRP_BITS){1'd0}};lvds_align_train_calc_add <= {(DRP_BITS+1){1'd0}};end else if (lvds_serdes_result_fsm_vld) beginlvds_align_train_temp_code <= lvds_check_status_code; // 来自其他模块的输入,重点关注lvds_align_train_temp_locked <= lvds_check_status_locked; // 来自其他模块的输入,重点关注 if (lvds_align_train_temp_locked == 1'd1) beginif (lvds_check_status_locked == 1'd1) beginif (lvds_align_train_temp_code == lvds_check_status_code) begin// 锁定状态,继续搜索lvds_align_train_temp_max <= lvds_serdes_drp_data_curr;// 最后一次if (lvds_serdes_drp_data_curr >= (DRP_MAX-1)) beginlvds_align_train_calc_min <= lvds_align_train_temp_min;lvds_align_train_calc_max <= (DRP_MAX-1);lvds_align_train_calc_sub <= (DRP_MAX-1) - lvds_align_train_temp_min;lvds_align_train_calc_add <= (DRP_MAX-1) + lvds_align_train_temp_min;endend else begin// 从A状态锁定切换到B状态锁定,需要触发保存A状态lvds_align_train_temp_min <= lvds_serdes_drp_data_curr;lvds_align_train_temp_max <= lvds_serdes_drp_data_curr;lvds_align_train_calc_min <= lvds_align_train_temp_min;lvds_align_train_calc_max <= lvds_align_train_temp_max;lvds_align_train_calc_sub <= lvds_align_train_temp_max - lvds_align_train_temp_min;lvds_align_train_calc_add <= lvds_align_train_temp_max + lvds_align_train_temp_min;endend else if (lvds_check_status_locked == 1'd0) begin// 从锁定变为不锁定,需要触发保存状态lvds_align_train_calc_min <= lvds_align_train_temp_min;lvds_align_train_calc_max <= lvds_align_train_temp_max;lvds_align_train_calc_sub <= lvds_align_train_temp_max - lvds_align_train_temp_min;lvds_align_train_calc_add <= lvds_align_train_temp_max + lvds_align_train_temp_min;endendend else if (lvds_align_train_temp_locked == 1'd0) beginif (lvds_check_status_locked == 1'd1) begin// 从不锁定变为锁定状态,继续搜索lvds_align_train_temp_min <= lvds_serdes_drp_data_curr;lvds_align_train_temp_max <= lvds_serdes_drp_data_curr;end else begin// 模糊状态,不用管endend
endalways @ (posedge lvds_rx_clk) beginif (lvds_serdes_rst) beginlvds_align_train_save_sub <= {(DRP_BITS){1'd0}};lvds_align_train_save_add <= {(DRP_BITS+1){1'd0}};lvds_align_train_save_min <= {(DRP_BITS){1'd0}};lvds_align_train_save_max <= {(DRP_BITS){1'd0}};end else if (lvds_align_train_calc_sub > lvds_align_train_save_sub) beginlvds_align_train_save_sub <= lvds_align_train_calc_sub;lvds_align_train_save_add <= lvds_align_train_calc_add;lvds_align_train_save_min <= lvds_align_train_calc_min;lvds_align_train_save_max <= lvds_align_train_calc_max;endlvds_align_train_save_ave <= lvds_align_train_save_add[DRP_BITS+1-1:1]; // 最终结果 输出 给其他模块使用
end