將vdma的基址映射到虛擬地址空間上,在linux系統(tǒng)下就可以直接通過(guò)指針訪問(wèn)vdma的各個(gè)寄存器
handle->vdmaVirtualAddress = (unsigned int*)mmap(NULL, 65535, PROT_READ | PROT_WRITE, MAP_SHARED, handle->vdmaHandler, (off_t)handle->baseAddr);
這個(gè)函數(shù)的返回值就是申請(qǐng)到的vdma基址的虛擬地址,這個(gè)地址加上寄存器offset就可以用來(lái)配置各個(gè)寄存器了,在使用之前要校驗(yàn)下申請(qǐng)的地址是不是有效的
if (handle->vdmaVirtualAddress == MAP_FAILED) {
perror("vdmaVirtualAddress mapping for absolute memory access failed.\n");
return -1;
}
然后還要申請(qǐng)圖像存放的地址,這里根據(jù)vdma的運(yùn)行方式可以配置多個(gè)地址存放多幅圖像數(shù)據(jù)。
下面是我的主程序,僅供參考:
int main() {
//variable start
int j, i;
Vec2b pix;
struct timeval tstart, tend, hls_start, hls_end;
float timeuse;
Mat src_rgb = imread(INPUT_IMAGE, 1);
Mat src_yuv(src_rgb.rows, src_rgb.cols, CV_8UC2);
Mat dst_yuv(src_rgb.rows, src_rgb.cols, CV_8UC2);
Mat dst_rgb(src_rgb.rows, src_rgb.cols, CV_8UC3);
//convert to yuv format
cvtcolor_rgb2yuv422(src_rgb, src_yuv);
IplImage src = src_yuv;
IplImage dst = dst_yuv;
//variable end
#if SW_GENERATE
printf("opencv software processing\n");
//calculate software used time
gettimeofday(&tstart, NULL);
opencv_sobel_init();
opencv_sobel(&src, &dst);
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec);
timeuse /= 1000000;
printf("soft used time is %f\n", timeuse);
cvtColor(dst_yuv, dst_rgb, CV_YUV2BGR_YUYV);
imwrite(OUTPUT_IMAGE_GOLDEN, dst_rgb);
#endif
if (!(init_filter() == XST_SUCCESS))
{
printf("filter init faild!");
}
set_reg_filter();
// Setup VDMA handle and memory-mapped ranges
vdma_setup(&handle, 0x43000000, 640, 480, 2, 0x1f400000, 0x1f800000, 0x1fc00000);
gettimeofday(&tstart, NULL);
memcpy(handle.fb1VirtualAddress, (uchar *)src.imageData, 640 * 480 * 2);
#if MEMCPY_CHECK
printf("memcpy checking \n");
u32 memcpy_error_flag = 0;
for (i = 0; i < src_yuv.rows; i++) //row 480
{
for (j = 0; j < src_yuv.cols; j++) //col 640*2
{
pix = src_yuv.at(i, j);
if ((handle.fb1VirtualAddress[j * 2 + i * 640 * 2] != pix.val[0]) || (
handle.fb1VirtualAddress[j * 2 + i * 640 * 2 + 1] != pix.val[1]))
{
memcpy_error_flag = 1;
}
}
}
if (memcpy_error_flag == 1)
{
printf("img copy error");
return 0;
}
//memset(handle.fb1VirtualAddress, 0, handle.width * handle.height * handle.pixelLength);
printf("memcpy check result FB2:(ORI)\n");
for (j = 512; j < 512 + 20; j++) printf(" %02x", handle.fb2VirtualAddress[j]); printf("\n");
#endif
gettimeofday(&hls_start, NULL);
vdma_start_triple_buffering(&handle);
//printf("hahahaha\n");
wait_done_filter();
gettimeofday(&hls_end, NULL);
#if RESULT_CHECK
printf("RESULT CHECK FB2:(NOW)\n");
for (j = 635 * 2; j < 635 * 2 + 20; j++) printf(" %02x", handle.fb2VirtualAddress[j]); printf("\n");
//}
#endif
memcpy((uchar *)dst.imageData, handle.fb2VirtualAddress, 640 * 480 * 2);
gettimeofday(&tend, NULL);
timeuse = 1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec);
timeuse /= 1000000;
printf("hard total used time is %f\n", timeuse);
timeuse = 1000000 * (hls_end.tv_sec - hls_start.tv_sec) + (hls_end.tv_usec - hls_start.tv_usec);
timeuse /= 1000000;
printf("hard hls used time is %f\n", timeuse);
print_vdma_register_status();
cvtColor(dst_yuv, dst_rgb, CV_YUV2BGR_YUYV);
imwrite(OUTPUT_IMAGE, dst_rgb);
stop_filter();
// Halt VDMA and unmap memory ranges
vdma_halt(&handle);
return image_compare(OUTPUT_IMAGE, OUTPUT_IMAGE_GOLDEN);
}
vdma寄存器配置參考pg020_axi_vdma文檔
軟件編譯選項(xiàng)
這里單獨(dú)列出來(lái)是因?yàn)楦杏X在這種處理器性能不是很好的硬件平臺(tái)下進(jìn)行大計(jì)算量算法的實(shí)施的情況下,一定要讓軟件以最高效率運(yùn)行(盡力最高效率吧)下面是辛辛苦苦寫的Makefile,用的通配符@梅神,稍微改改就可以用在新工程上
CC=g++
CFLAGS= -g -O2 -mcpu=cortex-a9 -mfpu=neon -ftree-vectorize -mvectorize-with-neon-quad #-mfloat-abi=softfp -ffast-math
CFLAGS+=`pkg-config --cflags opencv`
LDFLAGS+=`pkg-config --libs opencv`
OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
OBJS += $(patsubst %.cpp,%.o,$(wildcard *.cpp))
all: vdma_test
%.o: %.cpp
$(CC) -c $(CFLAGS) -o $@ $<
%.o: %.c
$(CC) -c $(CFLAGS) -o $@ $<
vdma_test: $(OBJS)
$(CC) -o $@ $(OBJS) $(LDFLAGS)
clean:
rm vdma_test $(OBJS)
cflag的優(yōu)化配置參考的xapp1206-boost-sw-performance-zynq7soc-w-neon文檔。
評(píng)論
查看更多