diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 06:03:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 06:03:18 +0000 |
commit | af5e8f5366b05c3d4f8521f318c143a5c5dc3ea9 (patch) | |
tree | c5691445908b1beca9facf0e5e3c5d7f35f74228 /lib/mesa/src/gallium/drivers/radeon | |
parent | 27c93456b58343162f7c4ad20ca6bea0c9a91646 (diff) |
Merge Mesa 20.1.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/radeon')
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/.editorconfig | 2 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c | 2467 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h | 656 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vce.c | 783 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 706 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c | 384 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c | 1103 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c | 2775 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h | 1419 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_video.c | 210 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_video.h | 25 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h | 1465 |
12 files changed, 5899 insertions, 6096 deletions
diff --git a/lib/mesa/src/gallium/drivers/radeon/.editorconfig b/lib/mesa/src/gallium/drivers/radeon/.editorconfig deleted file mode 100644 index cc8e11ffd..000000000 --- a/lib/mesa/src/gallium/drivers/radeon/.editorconfig +++ /dev/null @@ -1,2 +0,0 @@ -[*.{c,h}] -indent_style = tab diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c index 59ff5a88d..41f900076 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.c @@ -25,808 +25,808 @@ * **************************************************************************/ -#include <sys/types.h> -#include <assert.h> -#include <errno.h> -#include <unistd.h> -#include <stdio.h> +#include "radeon_uvd.h" #include "pipe/p_video_codec.h" - +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" +#include <sys/types.h> -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_uvd.h" +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> #define NUM_BUFFERS 4 #define NUM_MPEG2_REFS 6 -#define NUM_H264_REFS 17 -#define NUM_VC1_REFS 5 +#define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 -#define FB_BUFFER_OFFSET 0x1000 -#define FB_BUFFER_SIZE 2048 -#define FB_BUFFER_SIZE_TONGA (2048 * 64) -#define IT_SCALING_TABLE_SIZE 992 +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define IT_SCALING_TABLE_SIZE 992 #define UVD_SESSION_CONTEXT_SIZE (128 * 1024) /* UVD decoder representation */ struct ruvd_decoder { - struct pipe_video_codec base; - - ruvd_set_dtb set_dtb; - - unsigned stream_handle; - unsigned stream_type; - unsigned frame_number; - - struct pipe_screen *screen; - struct radeon_winsys* ws; - struct radeon_cmdbuf* cs; - - unsigned cur_buffer; - - struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; - struct ruvd_msg *msg; - uint32_t *fb; - unsigned fb_size; - uint8_t *it; - - struct rvid_buffer bs_buffers[NUM_BUFFERS]; - void* bs_ptr; - unsigned bs_size; - - struct rvid_buffer dpb; - bool use_legacy; - struct rvid_buffer ctx; - struct rvid_buffer sessionctx; - struct { - unsigned data0; - unsigned data1; - unsigned cmd; - unsigned cntl; - } reg; - - void *render_pic_list[16]; + struct pipe_video_codec base; + + ruvd_set_dtb set_dtb; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + unsigned cur_buffer; + + struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; + struct ruvd_msg *msg; + uint32_t *fb; + unsigned fb_size; + uint8_t *it; + + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + void *bs_ptr; + unsigned bs_size; + + struct rvid_buffer dpb; + bool use_legacy; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + struct { + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned cntl; + } reg; + + void *render_pic_list[16]; }; /* flush IB to the hardware */ static int flush(struct ruvd_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ -static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - int reloc_idx; - - reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - if (!dec->use_legacy) { - uint64_t addr; - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; - set_reg(dec, dec->reg.data0, addr); - set_reg(dec, dec->reg.data1, addr >> 32); - } else { - off += dec->ws->buffer_get_reloc_offset(buf); - set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); - set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); - } - set_reg(dec, dec->reg.cmd, cmd << 1); + int reloc_idx; + + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + if (!dec->use_legacy) { + uint64_t addr; + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; + set_reg(dec, dec->reg.data0, addr); + set_reg(dec, dec->reg.data1, addr >> 32); + } else { + off += dec->ws->buffer_get_reloc_offset(buf); + set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); + set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); + } + set_reg(dec, dec->reg.cmd, cmd << 1); } /* do the codec needs an IT buffer ?*/ static bool have_it(struct ruvd_decoder *dec) { - return dec->stream_type == RUVD_CODEC_H264_PERF || - dec->stream_type == RUVD_CODEC_H265; + return dec->stream_type == RUVD_CODEC_H264_PERF || dec->stream_type == RUVD_CODEC_H265; } /* map the next available message/feedback/itscaling buffer */ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) { - struct rvid_buffer* buf; - uint8_t *ptr; + struct rvid_buffer *buf; + uint8_t *ptr; - /* grab the current message/feedback buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + /* and map it for CPU access */ + ptr = + dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - /* calc buffer offsets */ - dec->msg = (struct ruvd_msg *)ptr; - memset(dec->msg, 0, sizeof(*dec->msg)); + /* calc buffer offsets */ + dec->msg = (struct ruvd_msg *)ptr; + memset(dec->msg, 0, sizeof(*dec->msg)); - dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); - if (have_it(dec)) - dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); } /* unmap and send a message command to the VCPU */ static void send_msg_buf(struct ruvd_decoder *dec) { - struct rvid_buffer* buf; - - /* ignore the request if message/feedback buffer isn't mapped */ - if (!dec->msg || !dec->fb) - return; + struct rvid_buffer *buf; - /* grab the current message buffer */ - buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; - /* unmap the buffer */ - dec->ws->buffer_unmap(buf->res->buf); - dec->msg = NULL; - dec->fb = NULL; - dec->it = NULL; + /* grab the current message buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; - if (dec->sessionctx.res) - send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, - dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, - RADEON_DOMAIN_VRAM); + if (dec->sessionctx.res) + send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - /* and send it to the hardware */ - send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + /* and send it to the hardware */ + send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); } /* cycle to the next set of buffers */ static void next_buffer(struct ruvd_decoder *dec) { - ++dec->cur_buffer; - dec->cur_buffer %= NUM_BUFFERS; + ++dec->cur_buffer; + dec->cur_buffer %= NUM_BUFFERS; } /* convert the profile into something UVD understands */ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) { - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - return (family >= CHIP_TONGA) ? - RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + return (family >= CHIP_TONGA) ? RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; - case PIPE_VIDEO_FORMAT_VC1: - return RUVD_CODEC_VC1; + case PIPE_VIDEO_FORMAT_VC1: + return RUVD_CODEC_VC1; - case PIPE_VIDEO_FORMAT_MPEG12: - return RUVD_CODEC_MPEG2; + case PIPE_VIDEO_FORMAT_MPEG12: + return RUVD_CODEC_MPEG2; - case PIPE_VIDEO_FORMAT_MPEG4: - return RUVD_CODEC_MPEG4; + case PIPE_VIDEO_FORMAT_MPEG4: + return RUVD_CODEC_MPEG4; - case PIPE_VIDEO_FORMAT_HEVC: - return RUVD_CODEC_H265; + case PIPE_VIDEO_FORMAT_HEVC: + return RUVD_CODEC_H265; - case PIPE_VIDEO_FORMAT_JPEG: - return RUVD_CODEC_MJPEG; + case PIPE_VIDEO_FORMAT_JPEG: + return RUVD_CODEC_MJPEG; - default: - assert(0); - return 0; - } + default: + assert(0); + return 0; + } } static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec) { - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - unsigned max_references = dec->base.max_references + 1; - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - if (!dec->use_legacy) { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); - } else { - // the firmware seems to always assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - // macroblock context buffer - ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); - } - - return ctx_size; + unsigned width_in_mb, height_in_mb, ctx_size; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); + } else { + // the firmware seems to always assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // macroblock context buffer + ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); + } + + return ctx_size; } static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) { - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - width = align (width, 16); - height = align (height, 16); - return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; + width = align(width, 16); + height = align(height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } -static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) +static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, + struct pipe_h265_picture_desc *pic) { - unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; - unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; - unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = + (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); - context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); - max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned)ceil(height * 8 / 2048.0); - cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; - db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); - return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; } static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) { - if (((struct si_screen*)dec->screen)->info.family < CHIP_VEGA10) - return 16; - else - return 32; + if (((struct si_screen *)dec->screen)->info.family < CHIP_VEGA10) + return 16; + else + return 32; } /* calculate size of reference picture buffer */ static unsigned calc_dpb_size(struct ruvd_decoder *dec) { - unsigned width_in_mb, height_in_mb, image_size, dpb_size; - - // always align them to MB size for dpb calculation - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - // always one more for currently decoded picture - unsigned max_references = dec->base.max_references + 1; - - // aligned size of a single frame - image_size = align(width, get_db_pitch_alignment(dec)) * height; - image_size += image_size / 2; - image_size = align(image_size, 1024); - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - if (!dec->use_legacy) { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned alignment = 64, num_dpb_buffer; - - if (dec->stream_type == RUVD_CODEC_H264_PERF) - alignment = 256; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { - dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); - dpb_size += align(width_in_mb * height_in_mb * 32, alignment); - } - } else { - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - // reference picture buffer - dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { - // macroblock context buffer - dpb_size += width_in_mb * height_in_mb * max_references * 192; - // IT surface buffer - dpb_size += width_in_mb * height_in_mb * 32; - } - } - break; - } - - case PIPE_VIDEO_FORMAT_HEVC: - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); - - width = align (width, 16); - height = align (height, 16); - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references; - else - dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references; - break; - - case PIPE_VIDEO_FORMAT_VC1: - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_VC1_REFS, max_references); - - // reference picture buffer - dpb_size = image_size * max_references; - - // CONTEXT_BUFFER - dpb_size += width_in_mb * height_in_mb * 128; - - // IT surface buffer - dpb_size += width_in_mb * 64; - - // DB surface buffer - dpb_size += width_in_mb * 128; - - // BP - dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - // reference picture buffer, must be big enough for all frames - dpb_size = image_size * NUM_MPEG2_REFS; - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - // reference picture buffer - dpb_size = image_size * max_references; - - // CM - dpb_size += width_in_mb * height_in_mb * 64; - - // IT surface buffer - dpb_size += align(width_in_mb * height_in_mb * 32, 64); - - dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - dpb_size = 0; - break; - - default: - // something is missing here - assert(0); - - // at least use a sane default value - dpb_size = 32 * 1024 * 1024; - break; - } - return dpb_size; + unsigned width_in_mb, height_in_mb, image_size, dpb_size; + + // always align them to MB size for dpb calculation + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + // always one more for currently decoded picture + unsigned max_references = dec->base.max_references + 1; + + // aligned size of a single frame + image_size = align(width, get_db_pitch_alignment(dec)) * height; + image_size += image_size / 2; + image_size = align(image_size, 1024); + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned alignment = 64, num_dpb_buffer; + + if (dec->stream_type == RUVD_CODEC_H264_PERF) + alignment = 256; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) { + dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); + dpb_size += align(width_in_mb * height_in_mb * 32, alignment); + } + } else { + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // reference picture buffer + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF) || + (((struct si_screen *)dec->screen)->info.family < CHIP_POLARIS10)) { + // macroblock context buffer + dpb_size += width_in_mb * height_in_mb * max_references * 192; + // IT surface buffer + dpb_size += width_in_mb * height_in_mb * 32; + } + } + break; + } + + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align(width, 16); + height = align(height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * + max_references; + else + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * + max_references; + break; + + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + + // reference picture buffer + dpb_size = image_size * max_references; + + // CONTEXT_BUFFER + dpb_size += width_in_mb * height_in_mb * 128; + + // IT surface buffer + dpb_size += width_in_mb * 64; + + // DB surface buffer + dpb_size += width_in_mb * 128; + + // BP + dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + // reference picture buffer, must be big enough for all frames + dpb_size = image_size * NUM_MPEG2_REFS; + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + // reference picture buffer + dpb_size = image_size * max_references; + + // CM + dpb_size += width_in_mb * height_in_mb * 64; + + // IT surface buffer + dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + + default: + // something is missing here + assert(0); + + // at least use a sane default value + dpb_size = 32 * 1024 * 1024; + break; + } + return dpb_size; } /* free associated data in the video buffer callback */ static void ruvd_destroy_associated_data(void *data) { - /* NOOP, since we only use an intptr */ + /* NOOP, since we only use an intptr */ } /* get h264 specific message bits */ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) { - struct ruvd_h264 result; - - memset(&result, 0, sizeof(result)); - switch (pic->base.profile) { - case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: - result.profile = RUVD_H264_PROFILE_BASELINE; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: - result.profile = RUVD_H264_PROFILE_MAIN; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - result.profile = RUVD_H264_PROFILE_HIGH; - break; - - default: - assert(0); - break; - } - - result.level = dec->base.level; - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; - result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; - result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; - result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; - - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; - result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - - switch (dec->base.chroma_format) { - case PIPE_VIDEO_CHROMA_FORMAT_NONE: - /* TODO: assert? */ - break; - case PIPE_VIDEO_CHROMA_FORMAT_400: - result.chroma_format = 0; - break; - case PIPE_VIDEO_CHROMA_FORMAT_420: - result.chroma_format = 1; - break; - case PIPE_VIDEO_CHROMA_FORMAT_422: - result.chroma_format = 2; - break; - case PIPE_VIDEO_CHROMA_FORMAT_444: - result.chroma_format = 3; - break; - } - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; - result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; - result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; - result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; - result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; - result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; - - result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; - result.slice_group_map_type = pic->pps->slice_group_map_type; - result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; - result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; - result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; - result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; - - memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); - memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); - - if (dec->stream_type == RUVD_CODEC_H264_PERF) { - memcpy(dec->it, result.scaling_list_4x4, 6*16); - memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); - } - - result.num_ref_frames = pic->num_ref_frames; - - result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; - result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; - - result.frame_num = pic->frame_num; - memcpy(result.frame_num_list, pic->frame_num_list, 4*16); - result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; - result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; - memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); - - result.decoded_pic_idx = pic->frame_num; - - return result; + struct ruvd_h264 result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + result.profile = RUVD_H264_PROFILE_BASELINE; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + result.profile = RUVD_H264_PROFILE_MAIN; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + result.profile = RUVD_H264_PROFILE_HIGH; + break; + + default: + assert(0); + break; + } + + result.level = dec->base.level; + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64); + + if (dec->stream_type == RUVD_CODEC_H264_PERF) { + memcpy(dec->it, result.scaling_list_4x4, 6 * 16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64); + } + + result.num_ref_frames = pic->num_ref_frames; + + result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + result.frame_num = pic->frame_num; + memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16); + result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; + result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; + memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2); + + result.decoded_pic_idx = pic->frame_num; + + return result; } /* get h265 specific message bits */ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, - struct pipe_h265_picture_desc *pic) + struct pipe_h265_picture_desc *pic) { - struct ruvd_h265 result; - unsigned i, j; - - memset(&result, 0, sizeof(result)); - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; - result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; - result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; - result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; - result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; - result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; - result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; - result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; - result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) - result.sps_info_flags |= 1 << 9; - if (pic->UseRefPicList == true) - result.sps_info_flags |= 1 << 10; - - result.chroma_format = pic->pps->sps->chroma_format_idc; - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; - result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; - result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; - result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; - result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; - result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; - result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; - result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; - result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; - result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; - result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; - result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; - result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; - result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; - result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; - result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; - result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; - result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; - result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; - result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; - result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; - result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; - result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; - result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; - //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? - - result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; - result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; - result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; - result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; - result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; - result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; - result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; - result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; - result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; - result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; - result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; - result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; - result.init_qp_minus26 = pic->pps->init_qp_minus26; - - for (i = 0; i < 19; ++i) - result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; - - for (i = 0; i < 21; ++i) - result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; - - result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; - result.curr_poc = pic->CurrPicOrderCntVal; - - for (i = 0 ; i < 16 ; i++) { - for (j = 0; (pic->ref[j] != NULL) && (j < 16) ; j++) { - if (dec->render_pic_list[i] == pic->ref[j]) - break; - if (j == 15) - dec->render_pic_list[i] = NULL; - else if (pic->ref[j+1] == NULL) - dec->render_pic_list[i] = NULL; - } - } - for (i = 0 ; i < 16 ; i++) { - if (dec->render_pic_list[i] == NULL) { - dec->render_pic_list[i] = target; - result.curr_idx = i; - break; - } - } - - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)result.curr_idx, - &ruvd_destroy_associated_data); - - for (i = 0; i < 16; ++i) { - struct pipe_video_buffer *ref = pic->ref[i]; - uintptr_t ref_pic = 0; - - result.poc_list[i] = pic->PicOrderCntVal[i]; - - if (ref) - ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - else - ref_pic = 0x7F; - result.ref_pic_list[i] = ref_pic; - } - - for (i = 0; i < 8; ++i) { - result.ref_pic_set_st_curr_before[i] = 0xFF; - result.ref_pic_set_st_curr_after[i] = 0xFF; - result.ref_pic_set_lt_curr[i] = 0xFF; - } - - for (i = 0; i < pic->NumPocStCurrBefore; ++i) - result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; - - for (i = 0; i < pic->NumPocStCurrAfter; ++i) - result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; - - for (i = 0; i < pic->NumPocLtCurr; ++i) - result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; - - for (i = 0; i < 6; ++i) - result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; - - for (i = 0; i < 2; ++i) - result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; - - memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); - memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); - memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); - memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); - - for (i = 0 ; i < 2 ; i++) { - for (j = 0 ; j < 15 ; j++) - result.direct_reflist[i][j] = pic->RefPicList[i][j]; - } - - if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - if (target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.luma_10to8 = 5; - result.chroma_10to8 = 5; - result.sclr_luma10to8 = 4; - result.sclr_chroma10to8 = 4; - } - } - - /* TODO - result.highestTid; - result.isNonRef; - - IDRPicFlag; - RAPPicFlag; - NumPocTotalCurr; - NumShortTermPictureSliceHeaderBits; - NumLongTermPictureSliceHeaderBits; - - IsLongTerm[16]; - */ - - return result; + struct ruvd_h265 result; + unsigned i, j; + + memset(&result, 0, sizeof(result)); + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + // result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_poc = pic->CurrPicOrderCntVal; + + for (i = 0; i < 16; i++) { + for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) { + if (dec->render_pic_list[i] == pic->ref[j]) + break; + if (j == 15) + dec->render_pic_list[i] = NULL; + else if (pic->ref[j + 1] == NULL) + dec->render_pic_list[i] = NULL; + } + } + for (i = 0; i < 16; i++) { + if (dec->render_pic_list[i] == NULL) { + dec->render_pic_list[i] = target; + result.curr_idx = i; + break; + } + } + + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx, + &ruvd_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 15; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.sclr_luma10to8 = 4; + result.sclr_chroma10to8 = 4; + } + } + + /* TODO + result.highestTid; + result.isNonRef; + + IDRPicFlag; + RAPPicFlag; + NumPocTotalCurr; + NumShortTermPictureSliceHeaderBits; + NumLongTermPictureSliceHeaderBits; + + IsLongTerm[16]; + */ + + return result; } /* get vc1 specific message bits */ static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) { - struct ruvd_vc1 result; - - memset(&result, 0, sizeof(result)); - - switch(pic->base.profile) { - case PIPE_VIDEO_PROFILE_VC1_SIMPLE: - result.profile = RUVD_VC1_PROFILE_SIMPLE; - result.level = 1; - break; - - case PIPE_VIDEO_PROFILE_VC1_MAIN: - result.profile = RUVD_VC1_PROFILE_MAIN; - result.level = 2; - break; - - case PIPE_VIDEO_PROFILE_VC1_ADVANCED: - result.profile = RUVD_VC1_PROFILE_ADVANCED; - result.level = 4; - break; - - default: - assert(0); - } - - /* fields common for all profiles */ - result.sps_info_flags |= pic->postprocflag << 7; - result.sps_info_flags |= pic->pulldown << 6; - result.sps_info_flags |= pic->interlace << 5; - result.sps_info_flags |= pic->tfcntrflag << 4; - result.sps_info_flags |= pic->finterpflag << 3; - result.sps_info_flags |= pic->psf << 1; - - result.pps_info_flags |= pic->range_mapy_flag << 31; - result.pps_info_flags |= pic->range_mapy << 28; - result.pps_info_flags |= pic->range_mapuv_flag << 27; - result.pps_info_flags |= pic->range_mapuv << 24; - result.pps_info_flags |= pic->multires << 21; - result.pps_info_flags |= pic->maxbframes << 16; - result.pps_info_flags |= pic->overlap << 11; - result.pps_info_flags |= pic->quantizer << 9; - result.pps_info_flags |= pic->panscan_flag << 7; - result.pps_info_flags |= pic->refdist_flag << 6; - result.pps_info_flags |= pic->vstransform << 0; - - /* some fields only apply to main/advanced profile */ - if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { - result.pps_info_flags |= pic->syncmarker << 20; - result.pps_info_flags |= pic->rangered << 19; - result.pps_info_flags |= pic->loopfilter << 5; - result.pps_info_flags |= pic->fastuvmc << 4; - result.pps_info_flags |= pic->extended_mv << 3; - result.pps_info_flags |= pic->extended_dmv << 8; - result.pps_info_flags |= pic->dquant << 1; - } - - result.chroma_format = 1; + struct ruvd_vc1 result; + + memset(&result, 0, sizeof(result)); + + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + result.profile = RUVD_VC1_PROFILE_SIMPLE; + result.level = 1; + break; + + case PIPE_VIDEO_PROFILE_VC1_MAIN: + result.profile = RUVD_VC1_PROFILE_MAIN; + result.level = 2; + break; + + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + result.profile = RUVD_VC1_PROFILE_ADVANCED; + result.level = 4; + break; + + default: + assert(0); + } + + /* fields common for all profiles */ + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; + + /* some fields only apply to main/advanced profile */ + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { + result.pps_info_flags |= pic->syncmarker << 20; + result.pps_info_flags |= pic->rangered << 19; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; + } + + result.chroma_format = 1; #if 0 //(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) @@ -837,132 +837,132 @@ uint8_t deblockEnable uint8_t pquant #endif - return result; + return result; } /* extract the frame number from a referenced video buffer */ static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) { - uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; - uint32_t max = MAX2(dec->frame_number, 1) - 1; - uintptr_t frame; + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; + uintptr_t frame; - /* seems to be the most sane fallback */ - if (!ref) - return max; + /* seems to be the most sane fallback */ + if (!ref) + return max; - /* get the frame number from the associated data */ - frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + /* get the frame number from the associated data */ + frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - /* limit the frame number to a valid range */ - return MAX2(MIN2(frame, max), min); + /* limit the frame number to a valid range */ + return MAX2(MIN2(frame, max), min); } /* get mpeg2 specific msg bits */ static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, - struct pipe_mpeg12_picture_desc *pic) + struct pipe_mpeg12_picture_desc *pic) { - const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; - struct ruvd_mpeg2 result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - for (i = 0; i < 2; ++i) - result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); - - if(pic->intra_matrix) { - result.load_intra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; - } - } - if(pic->non_intra_matrix) { - result.load_nonintra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; - } - } - - result.profile_and_level_indication = 0; - result.chroma_format = 0x1; - - result.picture_coding_type = pic->picture_coding_type; - result.f_code[0][0] = pic->f_code[0][0] + 1; - result.f_code[0][1] = pic->f_code[0][1] + 1; - result.f_code[1][0] = pic->f_code[1][0] + 1; - result.f_code[1][1] = pic->f_code[1][1] + 1; - result.intra_dc_precision = pic->intra_dc_precision; - result.pic_structure = pic->picture_structure; - result.top_field_first = pic->top_field_first; - result.frame_pred_frame_dct = pic->frame_pred_frame_dct; - result.concealment_motion_vectors = pic->concealment_motion_vectors; - result.q_scale_type = pic->q_scale_type; - result.intra_vlc_format = pic->intra_vlc_format; - result.alternate_scan = pic->alternate_scan; - - return result; + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + struct ruvd_mpeg2 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + if (pic->intra_matrix) { + result.load_intra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + } + } + if (pic->non_intra_matrix) { + result.load_nonintra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } + } + + result.profile_and_level_indication = 0; + result.chroma_format = 0x1; + + result.picture_coding_type = pic->picture_coding_type; + result.f_code[0][0] = pic->f_code[0][0] + 1; + result.f_code[0][1] = pic->f_code[0][1] + 1; + result.f_code[1][0] = pic->f_code[1][0] + 1; + result.f_code[1][1] = pic->f_code[1][1] + 1; + result.intra_dc_precision = pic->intra_dc_precision; + result.pic_structure = pic->picture_structure; + result.top_field_first = pic->top_field_first; + result.frame_pred_frame_dct = pic->frame_pred_frame_dct; + result.concealment_motion_vectors = pic->concealment_motion_vectors; + result.q_scale_type = pic->q_scale_type; + result.intra_vlc_format = pic->intra_vlc_format; + result.alternate_scan = pic->alternate_scan; + + return result; } /* get mpeg4 specific msg bits */ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, - struct pipe_mpeg4_picture_desc *pic) + struct pipe_mpeg4_picture_desc *pic) { - struct ruvd_mpeg4 result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - for (i = 0; i < 2; ++i) - result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); - - result.variant_type = 0; - result.profile_and_level_indication = 0xF0; // ASP Level0 - - result.video_object_layer_verid = 0x5; // advanced simple - result.video_object_layer_shape = 0x0; // rectangular - - result.video_object_layer_width = dec->base.width; - result.video_object_layer_height = dec->base.height; - - result.vop_time_increment_resolution = pic->vop_time_increment_resolution; - - result.flags |= pic->short_video_header << 0; - //result.flags |= obmc_disable << 1; - result.flags |= pic->interlaced << 2; - result.flags |= 1 << 3; // load_intra_quant_mat - result.flags |= 1 << 4; // load_nonintra_quant_mat - result.flags |= pic->quarter_sample << 5; - result.flags |= 1 << 6; // complexity_estimation_disable - result.flags |= pic->resync_marker_disable << 7; - //result.flags |= data_partitioned << 8; - //result.flags |= reversible_vlc << 9; - result.flags |= 0 << 10; // newpred_enable - result.flags |= 0 << 11; // reduced_resolution_vop_enable - //result.flags |= scalability << 12; - //result.flags |= is_object_layer_identifier << 13; - //result.flags |= fixed_vop_rate << 14; - //result.flags |= newpred_segment_type << 15; - - result.quant_type = pic->quant_type; - - for (i = 0; i < 64; ++i) { - result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; - result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; - } - - /* - int32_t trd [2] - int32_t trb [2] - uint8_t vop_coding_type - uint8_t vop_fcode_forward - uint8_t vop_fcode_backward - uint8_t rounding_control - uint8_t alternate_vertical_scan_flag - uint8_t top_field_first - */ - - return result; + struct ruvd_mpeg4 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + result.variant_type = 0; + result.profile_and_level_indication = 0xF0; // ASP Level0 + + result.video_object_layer_verid = 0x5; // advanced simple + result.video_object_layer_shape = 0x0; // rectangular + + result.video_object_layer_width = dec->base.width; + result.video_object_layer_height = dec->base.height; + + result.vop_time_increment_resolution = pic->vop_time_increment_resolution; + + result.flags |= pic->short_video_header << 0; + // result.flags |= obmc_disable << 1; + result.flags |= pic->interlaced << 2; + result.flags |= 1 << 3; // load_intra_quant_mat + result.flags |= 1 << 4; // load_nonintra_quant_mat + result.flags |= pic->quarter_sample << 5; + result.flags |= 1 << 6; // complexity_estimation_disable + result.flags |= pic->resync_marker_disable << 7; + // result.flags |= data_partitioned << 8; + // result.flags |= reversible_vlc << 9; + result.flags |= 0 << 10; // newpred_enable + result.flags |= 0 << 11; // reduced_resolution_vop_enable + // result.flags |= scalability << 12; + // result.flags |= is_object_layer_identifier << 13; + // result.flags |= fixed_vop_rate << 14; + // result.flags |= newpred_segment_type << 15; + + result.quant_type = pic->quant_type; + + for (i = 0; i < 64; ++i) { + result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; + result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; + } + + /* + int32_t trd [2] + int32_t trb [2] + uint8_t vop_coding_type + uint8_t vop_fcode_forward + uint8_t vop_fcode_backward + uint8_t rounding_control + uint8_t alternate_vertical_scan_flag + uint8_t top_field_first + */ + + return result; } /** @@ -970,237 +970,236 @@ static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, */ static void ruvd_destroy(struct pipe_video_codec *decoder) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - unsigned i; + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + unsigned i; - assert(decoder); + assert(decoder); - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_DESTROY; - dec->msg->stream_handle = dec->stream_handle; - send_msg_buf(dec); + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DESTROY; + dec->msg->stream_handle = dec->stream_handle; + send_msg_buf(dec); - flush(dec, 0); + flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); } /** * start decoding of a new frame */ -static void ruvd_begin_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void ruvd_begin_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - uintptr_t frame; + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + uintptr_t frame; - assert(decoder); + assert(decoder); - frame = ++dec->frame_number; - vl_video_buffer_set_associated_data(target, decoder, (void *)frame, - &ruvd_destroy_associated_data); + frame = ++dec->frame_number; + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + &ruvd_destroy_associated_data); - dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map( - dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + dec->bs_size = 0; + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** * decode a macroblock */ static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - const struct pipe_macroblock *macroblocks, - unsigned num_macroblocks) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) { - /* not supported (yet) */ - assert(0); + /* not supported (yet) */ + assert(0); } /** * decode a bitstream */ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - unsigned num_buffers, - const void * const *buffers, - const unsigned *sizes) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, unsigned num_buffers, + const void *const *buffers, const unsigned *sizes) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - unsigned i; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - for (i = 0; i < num_buffers; ++i) { - struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; - unsigned new_size = dec->bs_size + sizes[i]; - - if (new_size > buf->res->buf->size) { - dec->ws->buffer_unmap(buf->res->buf); - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { - RVID_ERR("Can't resize bitstream buffer!"); - return; - } - - dec->bs_ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dec->bs_ptr) - return; - - dec->bs_ptr += dec->bs_size; - } - - memcpy(dec->bs_ptr, buffers[i], sizes[i]); - dec->bs_size += sizes[i]; - dec->bs_ptr += sizes[i]; - } + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + unsigned i; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + for (i = 0; i < num_buffers; ++i) { + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + unsigned new_size = dec->bs_size + sizes[i]; + + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); + return; + } + + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dec->bs_ptr) + return; + + dec->bs_ptr += dec->bs_size; + } + + memcpy(dec->bs_ptr, buffers[i], sizes[i]); + dec->bs_size += sizes[i]; + dec->bs_ptr += sizes[i]; + } } /** * end decoding of the current frame */ -static void ruvd_end_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void ruvd_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; - struct pb_buffer *dt; - struct rvid_buffer *msg_fb_it_buf, *bs_buf; - unsigned bs_size; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; - bs_buf = &dec->bs_buffers[dec->cur_buffer]; - - bs_size = align(dec->bs_size, 128); - memset(dec->bs_ptr, 0, bs_size - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->res->buf); - - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_DECODE; - dec->msg->stream_handle = dec->stream_handle; - dec->msg->status_report_feedback_number = dec->frame_number; - - dec->msg->body.decode.stream_type = dec->stream_type; - dec->msg->body.decode.decode_flags = 0x1; - dec->msg->body.decode.width_in_samples = dec->base.width; - dec->msg->body.decode.height_in_samples = dec->base.height; - - if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || - (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { - dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16; - dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16; - } - - if (dec->dpb.res) - dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; - dec->msg->body.decode.bsd_size = bs_size; - dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); - - if (dec->stream_type == RUVD_CODEC_H264_PERF && - ((struct si_screen*)dec->screen)->info.family >= CHIP_POLARIS10) - dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - - dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); - if (((struct si_screen*)dec->screen)->info.family >= CHIP_STONEY) - dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; - - switch (u_reduce_video_profile(picture->profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_HEVC: - dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); - if (dec->ctx.res == NULL) { - unsigned ctx_size; - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture); - else - ctx_size = calc_ctx_size_h265_main(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - } - si_vid_clear_buffer(decoder->context, &dec->ctx); - } - - if (dec->ctx.res) - dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - break; - - case PIPE_VIDEO_FORMAT_VC1: - dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - break; - - default: - assert(0); - return; - } - - dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; - dec->msg->body.decode.extension_support = 0x1; - - /* set at least the feedback buffer size */ - dec->fb[0] = dec->fb_size; - - send_msg_buf(dec); - - if (dec->dpb.res) - send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - - if (dec->ctx.res) - send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, - FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); - if (have_it(dec)) - send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, - FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - set_reg(dec, dec->reg.cntl, 1); - - flush(dec, PIPE_FLUSH_ASYNC); - next_buffer(dec); + struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder; + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_buf, *bs_buf; + unsigned bs_size; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + bs_size = align(dec->bs_size, 128); + memset(dec->bs_ptr, 0, bs_size - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DECODE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->status_report_feedback_number = dec->frame_number; + + dec->msg->body.decode.stream_type = dec->stream_type; + dec->msg->body.decode.decode_flags = 0x1; + dec->msg->body.decode.width_in_samples = dec->base.width; + dec->msg->body.decode.height_in_samples = dec->base.height; + + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + dec->msg->body.decode.width_in_samples = + align(dec->msg->body.decode.width_in_samples, 16) / 16; + dec->msg->body.decode.height_in_samples = + align(dec->msg->body.decode.height_in_samples, 16) / 16; + } + + if (dec->dpb.res) + dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; + dec->msg->body.decode.bsd_size = bs_size; + dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); + + if (dec->stream_type == RUVD_CODEC_H264_PERF && + ((struct si_screen *)dec->screen)->info.family >= CHIP_POLARIS10) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; + + dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); + if (((struct si_screen *)dec->screen)->info.family >= CHIP_STONEY) + dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; + + switch (u_reduce_video_profile(picture->profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + dec->msg->body.decode.codec.h264 = + get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_HEVC: + dec->msg->body.decode.codec.h265 = + get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture); + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + } + si_vid_clear_buffer(decoder->context, &dec->ctx); + } + + if (dec->ctx.res) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; + break; + + case PIPE_VIDEO_FORMAT_VC1: + dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + dec->msg->body.decode.codec.mpeg2 = + get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + dec->msg->body.decode.codec.mpeg4 = + get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + break; + + default: + assert(0); + return; + } + + dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; + dec->msg->body.decode.extension_support = 0x1; + + /* set at least the feedback buffer size */ + dec->fb[0] = dec->fb_size; + + send_msg_buf(dec); + + if (dec->dpb.res) + send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + + if (dec->ctx.res) + send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ, + RADEON_DOMAIN_GTT); + send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, FB_BUFFER_OFFSET, + RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + set_reg(dec, dec->reg.cntl, 1); + + flush(dec, PIPE_FLUSH_ASYNC); + next_buffer(dec); } /** @@ -1214,276 +1213,290 @@ static void ruvd_flush(struct pipe_video_codec *decoder) * create and UVD decoder */ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - ruvd_set_dtb set_dtb) + const struct pipe_video_codec *templ, + ruvd_set_dtb set_dtb) { - struct si_context *sctx = (struct si_context*)context; - struct radeon_winsys *ws = sctx->ws; - unsigned dpb_size; - unsigned width = templ->width, height = templ->height; - unsigned bs_buf_size; - struct ruvd_decoder *dec; - int r, i; - - switch(u_reduce_video_profile(templ->profile)) { - case PIPE_VIDEO_FORMAT_MPEG12: - if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - return vl_create_mpeg12_decoder(context, templ); - - /* fall through */ - case PIPE_VIDEO_FORMAT_MPEG4: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - break; - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - break; - - default: - break; - } - - - dec = CALLOC_STRUCT(ruvd_decoder); - - if (!dec) - return NULL; - - if (!sctx->screen->info.is_amdgpu) - dec->use_legacy = true; - - dec->base = *templ; - dec->base.context = context; - dec->base.width = width; - dec->base.height = height; - - dec->base.destroy = ruvd_destroy; - dec->base.begin_frame = ruvd_begin_frame; - dec->base.decode_macroblock = ruvd_decode_macroblock; - dec->base.decode_bitstream = ruvd_decode_bitstream; - dec->base.end_frame = ruvd_end_frame; - dec->base.flush = ruvd_flush; - - dec->stream_type = profile2stream_type(dec, sctx->family); - dec->set_dtb = set_dtb; - dec->stream_handle = si_vid_alloc_stream_handle(); - dec->screen = context->screen; - dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); - if (!dec->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - for (i = 0; i < 16; i++) - dec->render_pic_list[i] = NULL; - dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : - FB_BUFFER_SIZE; - bs_buf_size = width * height * (512 / (16 * 16)); - for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; - STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); - if (have_it(dec)) - msg_fb_it_size += IT_SCALING_TABLE_SIZE; - if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], - msg_fb_it_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated message buffers.\n"); - goto error; - } - - if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], - bs_buf_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated bitstream buffers.\n"); - goto error; - } - - si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); - si_vid_clear_buffer(context, &dec->bs_buffers[i]); - } - - dpb_size = calc_dpb_size(dec); - if (dpb_size) { - if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated dpb.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->dpb); - } - - if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) { - unsigned ctx_size = calc_ctx_size_h264_perf(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->ctx); - } - - if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) { - if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, - UVD_SESSION_CONTEXT_SIZE, - PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated session ctx.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->sessionctx); - } - - if (sctx->family >= CHIP_VEGA10) { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; - dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15; - } else { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; - dec->reg.cntl = RUVD_ENGINE_CNTL; - } - - map_msg_fb_it_buf(dec); - dec->msg->size = sizeof(*dec->msg); - dec->msg->msg_type = RUVD_MSG_CREATE; - dec->msg->stream_handle = dec->stream_handle; - dec->msg->body.create.stream_type = dec->stream_type; - dec->msg->body.create.width_in_samples = dec->base.width; - dec->msg->body.create.height_in_samples = dec->base.height; - dec->msg->body.create.dpb_size = dpb_size; - send_msg_buf(dec); - r = flush(dec, 0); - if (r) - goto error; - - next_buffer(dec); - - return &dec->base; + struct si_context *sctx = (struct si_context *)context; + struct radeon_winsys *ws = sctx->ws; + unsigned dpb_size; + unsigned width = templ->width, height = templ->height; + unsigned bs_buf_size; + struct ruvd_decoder *dec; + int r, i; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + return vl_create_mpeg12_decoder(context, templ); + + /* fall through */ + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + + default: + break; + } + + dec = CALLOC_STRUCT(ruvd_decoder); + + if (!dec) + return NULL; + + if (!sctx->screen->info.is_amdgpu) + dec->use_legacy = true; + + dec->base = *templ; + dec->base.context = context; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = ruvd_destroy; + dec->base.begin_frame = ruvd_begin_frame; + dec->base.decode_macroblock = ruvd_decode_macroblock; + dec->base.decode_bitstream = ruvd_decode_bitstream; + dec->base.end_frame = ruvd_end_frame; + dec->base.flush = ruvd_flush; + + dec->stream_type = profile2stream_type(dec, sctx->family); + dec->set_dtb = set_dtb; + dec->stream_handle = si_vid_alloc_stream_handle(); + dec->screen = context->screen; + dec->ws = ws; + dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); + if (!dec->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + for (i = 0; i < 16; i++) + dec->render_pic_list[i] = NULL; + dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; + bs_buf_size = width * height * (512 / (16 * 16)); + for (i = 0; i < NUM_BUFFERS; ++i) { + unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; + STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); + if (have_it(dec)) + msg_fb_it_size += IT_SCALING_TABLE_SIZE; + if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], msg_fb_it_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated message buffers.\n"); + goto error; + } + + if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); + goto error; + } + + si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); + si_vid_clear_buffer(context, &dec->bs_buffers[i]); + } + + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->dpb); + } + + if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) { + unsigned ctx_size = calc_ctx_size_h264_perf(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->ctx); + } + + if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) { + if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, UVD_SESSION_CONTEXT_SIZE, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated session ctx.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->sessionctx); + } + + if (sctx->family >= CHIP_VEGA10) { + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; + dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15; + } else { + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; + dec->reg.cntl = RUVD_ENGINE_CNTL; + } + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_CREATE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->body.create.stream_type = dec->stream_type; + dec->msg->body.create.width_in_samples = dec->base.width; + dec->msg->body.create.height_in_samples = dec->base.height; + dec->msg->body.create.dpb_size = dpb_size; + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + + next_buffer(dec); + + return &dec->base; error: - if (dec->cs) dec->ws->cs_destroy(dec->cs); + if (dec->cs) + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); - return NULL; + return NULL; } /* calculate top/bottom offset */ static unsigned texture_offset(struct radeon_surf *surface, unsigned layer, - enum ruvd_surface_type type) + enum ruvd_surface_type type) { - switch (type) { - default: - case RUVD_SURFACE_TYPE_LEGACY: - return surface->u.legacy.level[0].offset + - layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; - break; - case RUVD_SURFACE_TYPE_GFX9: - return surface->u.gfx9.surf_offset + - layer * surface->u.gfx9.surf_slice_size; - break; - } + switch (type) { + default: + case RUVD_SURFACE_TYPE_LEGACY: + return surface->u.legacy.level[0].offset + + layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4; + break; + case RUVD_SURFACE_TYPE_GFX9: + return surface->u.gfx9.surf_offset + layer * surface->u.gfx9.surf_slice_size; + break; + } } /* hw encode the aspect of macro tiles */ static unsigned macro_tile_aspect(unsigned macro_tile_aspect) { - switch (macro_tile_aspect) { - default: - case 1: macro_tile_aspect = 0; break; - case 2: macro_tile_aspect = 1; break; - case 4: macro_tile_aspect = 2; break; - case 8: macro_tile_aspect = 3; break; - } - return macro_tile_aspect; + switch (macro_tile_aspect) { + default: + case 1: + macro_tile_aspect = 0; + break; + case 2: + macro_tile_aspect = 1; + break; + case 4: + macro_tile_aspect = 2; + break; + case 8: + macro_tile_aspect = 3; + break; + } + return macro_tile_aspect; } /* hw encode the bank width and height */ static unsigned bank_wh(unsigned bankwh) { - switch (bankwh) { - default: - case 1: bankwh = 0; break; - case 2: bankwh = 1; break; - case 4: bankwh = 2; break; - case 8: bankwh = 3; break; - } - return bankwh; + switch (bankwh) { + default: + case 1: + bankwh = 0; + break; + case 2: + bankwh = 1; + break; + case 4: + bankwh = 2; + break; + case 8: + bankwh = 3; + break; + } + return bankwh; } /** * fill decoding target field from the luma and chroma surfaces */ void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type) + struct radeon_surf *chroma, enum ruvd_surface_type type) { - switch (type) { - default: - case RUVD_SURFACE_TYPE_LEGACY: - msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; - switch (luma->u.legacy.level[0].mode) { - case RADEON_SURF_MODE_LINEAR_ALIGNED: - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - break; - case RADEON_SURF_MODE_1D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; - break; - case RADEON_SURF_MODE_2D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; - break; - default: - assert(0); - break; - } - - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); - if (chroma) - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - if (chroma) - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - - if (chroma) { - assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); - assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); - assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); - } - - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); - msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); - break; - case RUVD_SURFACE_TYPE_GFX9: - msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w; - /* SWIZZLE LINEAR MODE */ - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - msg->body.decode.dt_surf_tile_config = 0; - break; - } + switch (type) { + default: + case RUVD_SURFACE_TYPE_LEGACY: + msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; + switch (luma->u.legacy.level[0].mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + break; + case RADEON_SURF_MODE_1D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; + break; + case RADEON_SURF_MODE_2D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; + break; + default: + assert(0); + break; + } + + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); + if (chroma) + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); + if (chroma) + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } + + if (chroma) { + assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); + assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); + assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); + } + + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); + msg->body.decode.dt_surf_tile_config |= + RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); + break; + case RUVD_SURFACE_TYPE_GFX9: + msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w; + /* SWIZZLE LINEAR MODE */ + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } + msg->body.decode.dt_surf_tile_config = 0; + break; + } } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h index 583b4d5e4..c1d221930 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_uvd.h @@ -32,410 +32,410 @@ #include "vl/vl_video_buffer.h" /* UVD uses PM4 packet type 0 and 2 */ -#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) -#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define RUVD_PKT_TYPE_C 0x3FFFFFFF -#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) -#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define RUVD_PKT_COUNT_C 0xC000FFFF -#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0) -#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 -#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) -#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) +#define RUVD_PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) +#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define RUVD_PKT_TYPE_C 0x3FFFFFFF +#define RUVD_PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) +#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define RUVD_PKT_COUNT_C 0xC000FFFF +#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0) +#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 +#define RUVD_PKT0(index, count) \ + (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) +#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) /* registers involved with UVD */ -#define RUVD_GPCOM_VCPU_CMD 0xEF0C -#define RUVD_GPCOM_VCPU_DATA0 0xEF10 -#define RUVD_GPCOM_VCPU_DATA1 0xEF14 -#define RUVD_ENGINE_CNTL 0xEF18 +#define RUVD_GPCOM_VCPU_CMD 0xEF0C +#define RUVD_GPCOM_VCPU_DATA0 0xEF10 +#define RUVD_GPCOM_VCPU_DATA1 0xEF14 +#define RUVD_ENGINE_CNTL 0xEF18 -#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c -#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710 -#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714 -#define RUVD_ENGINE_CNTL_SOC15 0x20718 +#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c +#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710 +#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714 +#define RUVD_ENGINE_CNTL_SOC15 0x20718 /* UVD commands to VCPU */ -#define RUVD_CMD_MSG_BUFFER 0x00000000 -#define RUVD_CMD_DPB_BUFFER 0x00000001 -#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 -#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 -#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005 -#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 -#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204 -#define RUVD_CMD_CONTEXT_BUFFER 0x00000206 +#define RUVD_CMD_MSG_BUFFER 0x00000000 +#define RUVD_CMD_DPB_BUFFER 0x00000001 +#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 +#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 +#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204 +#define RUVD_CMD_CONTEXT_BUFFER 0x00000206 /* UVD message types */ -#define RUVD_MSG_CREATE 0 -#define RUVD_MSG_DECODE 1 -#define RUVD_MSG_DESTROY 2 +#define RUVD_MSG_CREATE 0 +#define RUVD_MSG_DECODE 1 +#define RUVD_MSG_DESTROY 2 /* UVD stream types */ -#define RUVD_CODEC_H264 0x00000000 -#define RUVD_CODEC_VC1 0x00000001 -#define RUVD_CODEC_MPEG2 0x00000003 -#define RUVD_CODEC_MPEG4 0x00000004 -#define RUVD_CODEC_H264_PERF 0x00000007 -#define RUVD_CODEC_MJPEG 0x00000008 -#define RUVD_CODEC_H265 0x00000010 +#define RUVD_CODEC_H264 0x00000000 +#define RUVD_CODEC_VC1 0x00000001 +#define RUVD_CODEC_MPEG2 0x00000003 +#define RUVD_CODEC_MPEG4 0x00000004 +#define RUVD_CODEC_H264_PERF 0x00000007 +#define RUVD_CODEC_MJPEG 0x00000008 +#define RUVD_CODEC_H265 0x00000010 /* UVD decode target buffer tiling mode */ -#define RUVD_TILE_LINEAR 0x00000000 -#define RUVD_TILE_8X4 0x00000001 -#define RUVD_TILE_8X8 0x00000002 -#define RUVD_TILE_32AS8 0x00000003 +#define RUVD_TILE_LINEAR 0x00000000 +#define RUVD_TILE_8X4 0x00000001 +#define RUVD_TILE_8X8 0x00000002 +#define RUVD_TILE_32AS8 0x00000003 /* UVD decode target buffer array mode */ -#define RUVD_ARRAY_MODE_LINEAR 0x00000000 -#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 -#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 -#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 -#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 -#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 +#define RUVD_ARRAY_MODE_LINEAR 0x00000000 +#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 +#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 +#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 /* UVD tile config */ -#define RUVD_BANK_WIDTH(x) ((x) << 0) -#define RUVD_BANK_HEIGHT(x) ((x) << 3) -#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) -#define RUVD_NUM_BANKS(x) ((x) << 9) +#define RUVD_BANK_WIDTH(x) ((x) << 0) +#define RUVD_BANK_HEIGHT(x) ((x) << 3) +#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) +#define RUVD_NUM_BANKS(x) ((x) << 9) /* H.264 profile definitions */ -#define RUVD_H264_PROFILE_BASELINE 0x00000000 -#define RUVD_H264_PROFILE_MAIN 0x00000001 -#define RUVD_H264_PROFILE_HIGH 0x00000002 -#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 -#define RUVD_H264_PROFILE_MVC 0x00000004 +#define RUVD_H264_PROFILE_BASELINE 0x00000000 +#define RUVD_H264_PROFILE_MAIN 0x00000001 +#define RUVD_H264_PROFILE_HIGH 0x00000002 +#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 +#define RUVD_H264_PROFILE_MVC 0x00000004 /* VC-1 profile definitions */ -#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 -#define RUVD_VC1_PROFILE_MAIN 0x00000001 -#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 +#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 +#define RUVD_VC1_PROFILE_MAIN 0x00000001 +#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 -enum ruvd_surface_type { - RUVD_SURFACE_TYPE_LEGACY = 0, - RUVD_SURFACE_TYPE_GFX9 +enum ruvd_surface_type +{ + RUVD_SURFACE_TYPE_LEGACY = 0, + RUVD_SURFACE_TYPE_GFX9 }; struct ruvd_mvc_element { - uint16_t viewOrderIndex; - uint16_t viewId; - uint16_t numOfAnchorRefsInL0; - uint16_t viewIdOfAnchorRefsInL0[15]; - uint16_t numOfAnchorRefsInL1; - uint16_t viewIdOfAnchorRefsInL1[15]; - uint16_t numOfNonAnchorRefsInL0; - uint16_t viewIdOfNonAnchorRefsInL0[15]; - uint16_t numOfNonAnchorRefsInL1; - uint16_t viewIdOfNonAnchorRefsInL1[15]; + uint16_t viewOrderIndex; + uint16_t viewId; + uint16_t numOfAnchorRefsInL0; + uint16_t viewIdOfAnchorRefsInL0[15]; + uint16_t numOfAnchorRefsInL1; + uint16_t viewIdOfAnchorRefsInL1[15]; + uint16_t numOfNonAnchorRefsInL0; + uint16_t viewIdOfNonAnchorRefsInL0[15]; + uint16_t numOfNonAnchorRefsInL1; + uint16_t viewIdOfNonAnchorRefsInL1[15]; }; struct ruvd_h264 { - uint32_t profile; - uint32_t level; + uint32_t profile; + uint32_t level; - uint32_t sps_info_flags; - uint32_t pps_info_flags; - uint8_t chroma_format; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_frame_num_minus4; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_frame_num_minus4; - uint8_t pic_order_cnt_type; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - uint8_t num_ref_frames; - uint8_t reserved_8bit; + uint8_t pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t num_ref_frames; + uint8_t reserved_8bit; - int8_t pic_init_qp_minus26; - int8_t pic_init_qs_minus26; - int8_t chroma_qp_index_offset; - int8_t second_chroma_qp_index_offset; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; - uint8_t num_slice_groups_minus1; - uint8_t slice_group_map_type; - uint8_t num_ref_idx_l0_active_minus1; - uint8_t num_ref_idx_l1_active_minus1; + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; - uint16_t slice_group_change_rate_minus1; - uint16_t reserved_16bit_1; + uint16_t slice_group_change_rate_minus1; + uint16_t reserved_16bit_1; - uint8_t scaling_list_4x4[6][16]; - uint8_t scaling_list_8x8[2][64]; + uint8_t scaling_list_4x4[6][16]; + uint8_t scaling_list_8x8[2][64]; - uint32_t frame_num; - uint32_t frame_num_list[16]; - int32_t curr_field_order_cnt_list[2]; - int32_t field_order_cnt_list[16][2]; + uint32_t frame_num; + uint32_t frame_num_list[16]; + int32_t curr_field_order_cnt_list[2]; + int32_t field_order_cnt_list[16][2]; - uint32_t decoded_pic_idx; + uint32_t decoded_pic_idx; - uint32_t curr_pic_ref_frame_num; + uint32_t curr_pic_ref_frame_num; - uint8_t ref_frame_list[16]; + uint8_t ref_frame_list[16]; - uint32_t reserved[122]; + uint32_t reserved[122]; - struct { - uint32_t numViews; - uint32_t viewId0; - struct ruvd_mvc_element mvcElements[1]; - } mvc; + struct { + uint32_t numViews; + uint32_t viewId0; + struct ruvd_mvc_element mvcElements[1]; + } mvc; }; struct ruvd_h265 { - uint32_t sps_info_flags; - uint32_t pps_info_flags; - - uint8_t chroma_format; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - - uint8_t sps_max_dec_pic_buffering_minus1; - uint8_t log2_min_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_luma_coding_block_size; - uint8_t log2_min_transform_block_size_minus2; - - uint8_t log2_diff_max_min_transform_block_size; - uint8_t max_transform_hierarchy_depth_inter; - uint8_t max_transform_hierarchy_depth_intra; - uint8_t pcm_sample_bit_depth_luma_minus1; - - uint8_t pcm_sample_bit_depth_chroma_minus1; - uint8_t log2_min_pcm_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_pcm_luma_coding_block_size; - uint8_t num_extra_slice_header_bits; - - uint8_t num_short_term_ref_pic_sets; - uint8_t num_long_term_ref_pic_sps; - uint8_t num_ref_idx_l0_default_active_minus1; - uint8_t num_ref_idx_l1_default_active_minus1; - - int8_t pps_cb_qp_offset; - int8_t pps_cr_qp_offset; - int8_t pps_beta_offset_div2; - int8_t pps_tc_offset_div2; - - uint8_t diff_cu_qp_delta_depth; - uint8_t num_tile_columns_minus1; - uint8_t num_tile_rows_minus1; - uint8_t log2_parallel_merge_level_minus2; - - uint16_t column_width_minus1[19]; - uint16_t row_height_minus1[21]; - - int8_t init_qp_minus26; - uint8_t num_delta_pocs_ref_rps_idx; - uint8_t curr_idx; - uint8_t reserved1; - int32_t curr_poc; - uint8_t ref_pic_list[16]; - int32_t poc_list[16]; - uint8_t ref_pic_set_st_curr_before[8]; - uint8_t ref_pic_set_st_curr_after[8]; - uint8_t ref_pic_set_lt_curr[8]; - - uint8_t ucScalingListDCCoefSizeID2[6]; - uint8_t ucScalingListDCCoefSizeID3[2]; - - uint8_t highestTid; - uint8_t isNonRef; - - uint8_t p010_mode; - uint8_t msb_mode; - uint8_t luma_10to8; - uint8_t chroma_10to8; - uint8_t sclr_luma10to8; - uint8_t sclr_chroma10to8; - - uint8_t direct_reflist[2][15]; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + + uint8_t sps_max_dec_pic_buffering_minus1; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_transform_block_size_minus2; + + uint8_t log2_diff_max_min_transform_block_size; + uint8_t max_transform_hierarchy_depth_inter; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t pcm_sample_bit_depth_luma_minus1; + + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint8_t num_extra_slice_header_bits; + + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pic_sps; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + + uint8_t diff_cu_qp_delta_depth; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + uint8_t log2_parallel_merge_level_minus2; + + uint16_t column_width_minus1[19]; + uint16_t row_height_minus1[21]; + + int8_t init_qp_minus26; + uint8_t num_delta_pocs_ref_rps_idx; + uint8_t curr_idx; + uint8_t reserved1; + int32_t curr_poc; + uint8_t ref_pic_list[16]; + int32_t poc_list[16]; + uint8_t ref_pic_set_st_curr_before[8]; + uint8_t ref_pic_set_st_curr_after[8]; + uint8_t ref_pic_set_lt_curr[8]; + + uint8_t ucScalingListDCCoefSizeID2[6]; + uint8_t ucScalingListDCCoefSizeID3[2]; + + uint8_t highestTid; + uint8_t isNonRef; + + uint8_t p010_mode; + uint8_t msb_mode; + uint8_t luma_10to8; + uint8_t chroma_10to8; + uint8_t sclr_luma10to8; + uint8_t sclr_chroma10to8; + + uint8_t direct_reflist[2][15]; }; struct ruvd_vc1 { - uint32_t profile; - uint32_t level; - uint32_t sps_info_flags; - uint32_t pps_info_flags; - uint32_t pic_structure; - uint32_t chroma_format; + uint32_t profile; + uint32_t level; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint32_t pic_structure; + uint32_t chroma_format; }; struct ruvd_mpeg2 { - uint32_t decoded_pic_idx; - uint32_t ref_pic_idx[2]; - - uint8_t load_intra_quantiser_matrix; - uint8_t load_nonintra_quantiser_matrix; - uint8_t reserved_quantiser_alignement[2]; - uint8_t intra_quantiser_matrix[64]; - uint8_t nonintra_quantiser_matrix[64]; - - uint8_t profile_and_level_indication; - uint8_t chroma_format; - - uint8_t picture_coding_type; - - uint8_t reserved_1; - - uint8_t f_code[2][2]; - uint8_t intra_dc_precision; - uint8_t pic_structure; - uint8_t top_field_first; - uint8_t frame_pred_frame_dct; - uint8_t concealment_motion_vectors; - uint8_t q_scale_type; - uint8_t intra_vlc_format; - uint8_t alternate_scan; + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; + + uint8_t load_intra_quantiser_matrix; + uint8_t load_nonintra_quantiser_matrix; + uint8_t reserved_quantiser_alignement[2]; + uint8_t intra_quantiser_matrix[64]; + uint8_t nonintra_quantiser_matrix[64]; + + uint8_t profile_and_level_indication; + uint8_t chroma_format; + + uint8_t picture_coding_type; + + uint8_t reserved_1; + + uint8_t f_code[2][2]; + uint8_t intra_dc_precision; + uint8_t pic_structure; + uint8_t top_field_first; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t q_scale_type; + uint8_t intra_vlc_format; + uint8_t alternate_scan; }; -struct ruvd_mpeg4 -{ - uint32_t decoded_pic_idx; - uint32_t ref_pic_idx[2]; +struct ruvd_mpeg4 { + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; - uint32_t variant_type; - uint8_t profile_and_level_indication; + uint32_t variant_type; + uint8_t profile_and_level_indication; - uint8_t video_object_layer_verid; - uint8_t video_object_layer_shape; + uint8_t video_object_layer_verid; + uint8_t video_object_layer_shape; - uint8_t reserved_1; + uint8_t reserved_1; - uint16_t video_object_layer_width; - uint16_t video_object_layer_height; + uint16_t video_object_layer_width; + uint16_t video_object_layer_height; - uint16_t vop_time_increment_resolution; + uint16_t vop_time_increment_resolution; - uint16_t reserved_2; + uint16_t reserved_2; - uint32_t flags; + uint32_t flags; - uint8_t quant_type; + uint8_t quant_type; - uint8_t reserved_3[3]; + uint8_t reserved_3[3]; - uint8_t intra_quant_mat[64]; - uint8_t nonintra_quant_mat[64]; + uint8_t intra_quant_mat[64]; + uint8_t nonintra_quant_mat[64]; - struct { - uint8_t sprite_enable; + struct { + uint8_t sprite_enable; - uint8_t reserved_4[3]; + uint8_t reserved_4[3]; - uint16_t sprite_width; - uint16_t sprite_height; - int16_t sprite_left_coordinate; - int16_t sprite_top_coordinate; + uint16_t sprite_width; + uint16_t sprite_height; + int16_t sprite_left_coordinate; + int16_t sprite_top_coordinate; - uint8_t no_of_sprite_warping_points; - uint8_t sprite_warping_accuracy; - uint8_t sprite_brightness_change; - uint8_t low_latency_sprite_enable; - } sprite_config; + uint8_t no_of_sprite_warping_points; + uint8_t sprite_warping_accuracy; + uint8_t sprite_brightness_change; + uint8_t low_latency_sprite_enable; + } sprite_config; - struct { - uint32_t flags; - uint8_t vol_mode; - uint8_t reserved_5[3]; - } divx_311_config; + struct { + uint32_t flags; + uint8_t vol_mode; + uint8_t reserved_5[3]; + } divx_311_config; }; /* message between driver and hardware */ struct ruvd_msg { - uint32_t size; - uint32_t msg_type; - uint32_t stream_handle; - uint32_t status_report_feedback_number; - - union { - struct { - uint32_t stream_type; - uint32_t session_flags; - uint32_t asic_id; - uint32_t width_in_samples; - uint32_t height_in_samples; - uint32_t dpb_buffer; - uint32_t dpb_size; - uint32_t dpb_model; - uint32_t version_info; - } create; - - struct { - uint32_t stream_type; - uint32_t decode_flags; - uint32_t width_in_samples; - uint32_t height_in_samples; - - uint32_t dpb_buffer; - uint32_t dpb_size; - uint32_t dpb_model; - uint32_t dpb_reserved; - - uint32_t db_offset_alignment; - uint32_t db_pitch; - uint32_t db_tiling_mode; - uint32_t db_array_mode; - uint32_t db_field_mode; - uint32_t db_surf_tile_config; - uint32_t db_aligned_height; - uint32_t db_reserved; - - uint32_t use_addr_macro; - - uint32_t bsd_buffer; - uint32_t bsd_size; - - uint32_t pic_param_buffer; - uint32_t pic_param_size; - uint32_t mb_cntl_buffer; - uint32_t mb_cntl_size; - - uint32_t dt_buffer; - uint32_t dt_pitch; - uint32_t dt_tiling_mode; - uint32_t dt_array_mode; - uint32_t dt_field_mode; - uint32_t dt_luma_top_offset; - uint32_t dt_luma_bottom_offset; - uint32_t dt_chroma_top_offset; - uint32_t dt_chroma_bottom_offset; - uint32_t dt_surf_tile_config; - uint32_t dt_uv_surf_tile_config; - // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney - uint32_t dt_wa_chroma_top_offset; - uint32_t dt_wa_chroma_bottom_offset; - - uint32_t reserved[16]; - - union { - struct ruvd_h264 h264; - struct ruvd_h265 h265; - struct ruvd_vc1 vc1; - struct ruvd_mpeg2 mpeg2; - struct ruvd_mpeg4 mpeg4; - - uint32_t info[768]; - } codec; - - uint8_t extension_support; - uint8_t reserved_8bit_1; - uint8_t reserved_8bit_2; - uint8_t reserved_8bit_3; - uint32_t extension_reserved[64]; - } decode; - } body; + uint32_t size; + uint32_t msg_type; + uint32_t stream_handle; + uint32_t status_report_feedback_number; + + union { + struct { + uint32_t stream_type; + uint32_t session_flags; + uint32_t asic_id; + uint32_t width_in_samples; + uint32_t height_in_samples; + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t version_info; + } create; + + struct { + uint32_t stream_type; + uint32_t decode_flags; + uint32_t width_in_samples; + uint32_t height_in_samples; + + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t dpb_reserved; + + uint32_t db_offset_alignment; + uint32_t db_pitch; + uint32_t db_tiling_mode; + uint32_t db_array_mode; + uint32_t db_field_mode; + uint32_t db_surf_tile_config; + uint32_t db_aligned_height; + uint32_t db_reserved; + + uint32_t use_addr_macro; + + uint32_t bsd_buffer; + uint32_t bsd_size; + + uint32_t pic_param_buffer; + uint32_t pic_param_size; + uint32_t mb_cntl_buffer; + uint32_t mb_cntl_size; + + uint32_t dt_buffer; + uint32_t dt_pitch; + uint32_t dt_tiling_mode; + uint32_t dt_array_mode; + uint32_t dt_field_mode; + uint32_t dt_luma_top_offset; + uint32_t dt_luma_bottom_offset; + uint32_t dt_chroma_top_offset; + uint32_t dt_chroma_bottom_offset; + uint32_t dt_surf_tile_config; + uint32_t dt_uv_surf_tile_config; + // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney + uint32_t dt_wa_chroma_top_offset; + uint32_t dt_wa_chroma_bottom_offset; + + uint32_t reserved[16]; + + union { + struct ruvd_h264 h264; + struct ruvd_h265 h265; + struct ruvd_vc1 vc1; + struct ruvd_mpeg2 mpeg2; + struct ruvd_mpeg4 mpeg4; + + uint32_t info[768]; + } codec; + + uint8_t extension_support; + uint8_t reserved_8bit_1; + uint8_t reserved_8bit_2; + uint8_t reserved_8bit_3; + uint32_t extension_reserved[64]; + } decode; + } body; }; /* driver dependent callback */ -typedef struct pb_buffer* (*ruvd_set_dtb) -(struct ruvd_msg* msg, struct vl_video_buffer *vb); +typedef struct pb_buffer *(*ruvd_set_dtb)(struct ruvd_msg *msg, struct vl_video_buffer *vb); /* create an UVD decode */ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templat, - ruvd_set_dtb set_dtb); + const struct pipe_video_codec *templat, + ruvd_set_dtb set_dtb); /* fill decoding target field from the luma and chroma surfaces */ void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type); + struct radeon_surf *chroma, enum ruvd_surface_type type); #endif diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c index 264b96b83..da831015e 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.c @@ -25,37 +25,35 @@ * **************************************************************************/ -#include <stdio.h> +#include "radeon_vce.h" #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> -#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) -#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) -#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) +#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) +#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) +#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) -#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) -#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) -#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) -#define FW_53 (53 << 24) +#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) +#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) +#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) +#define FW_53 (53 << 24) /** * flush commands to the hardware */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); - enc->task_info_idx = 0; - enc->bs_idx = 0; + enc->ws->cs_flush(enc->cs, PIPE_FLUSH_ASYNC, NULL); + enc->task_info_idx = 0; + enc->bs_idx = 0; } #if 0 @@ -89,17 +87,17 @@ static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) */ static void reset_cpb(struct rvce_encoder *enc) { - unsigned i; - - LIST_INITHEAD(&enc->cpb_slots); - for (i = 0; i < enc->cpb_num; ++i) { - struct rvce_cpb_slot *slot = &enc->cpb_array[i]; - slot->index = i; - slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; - slot->frame_num = 0; - slot->pic_order_cnt = 0; - LIST_ADDTAIL(&slot->list, &enc->cpb_slots); - } + unsigned i; + + list_inithead(&enc->cpb_slots); + for (i = 0; i < enc->cpb_num; ++i) { + struct rvce_cpb_slot *slot = &enc->cpb_array[i]; + slot->index = i; + slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; + slot->frame_num = 0; + slot->pic_order_cnt = 0; + list_addtail(&slot->list, &enc->cpb_slots); + } } /** @@ -107,32 +105,31 @@ static void reset_cpb(struct rvce_encoder *enc) */ static void sort_cpb(struct rvce_encoder *enc) { - struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; + struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; - LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) { - if (i->frame_num == enc->pic.ref_idx_l0) - l0 = i; + LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) { + if (i->frame_num == enc->pic.ref_idx_l0) + l0 = i; - if (i->frame_num == enc->pic.ref_idx_l1) - l1 = i; + if (i->frame_num == enc->pic.ref_idx_l1) + l1 = i; - if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) - break; + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) + break; - if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && - l0 && l1) - break; - } + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && l0 && l1) + break; + } - if (l1) { - LIST_DEL(&l1->list); - LIST_ADD(&l1->list, &enc->cpb_slots); - } + if (l1) { + list_del(&l1->list); + list_add(&l1->list, &enc->cpb_slots); + } - if (l0) { - LIST_DEL(&l0->list); - LIST_ADD(&l0->list, &enc->cpb_slots); - } + if (l0) { + list_del(&l0->list); + list_add(&l0->list, &enc->cpb_slots); + } } /** @@ -140,53 +137,53 @@ static void sort_cpb(struct rvce_encoder *enc) */ static unsigned get_cpb_num(struct rvce_encoder *enc) { - unsigned w = align(enc->base.width, 16) / 16; - unsigned h = align(enc->base.height, 16) / 16; - unsigned dpb; - - switch (enc->base.level) { - case 10: - dpb = 396; - break; - case 11: - dpb = 900; - break; - case 12: - case 13: - case 20: - dpb = 2376; - break; - case 21: - dpb = 4752; - break; - case 22: - case 30: - dpb = 8100; - break; - case 31: - dpb = 18000; - break; - case 32: - dpb = 20480; - break; - case 40: - case 41: - dpb = 32768; - break; - case 42: - dpb = 34816; - break; - case 50: - dpb = 110400; - break; - default: - case 51: - case 52: - dpb = 184320; - break; - } - - return MIN2(dpb / (w * h), 16); + unsigned w = align(enc->base.width, 16) / 16; + unsigned h = align(enc->base.height, 16) / 16; + unsigned dpb; + + switch (enc->base.level) { + case 10: + dpb = 396; + break; + case 11: + dpb = 900; + break; + case 12: + case 13: + case 20: + dpb = 2376; + break; + case 21: + dpb = 4752; + break; + case 22: + case 30: + dpb = 8100; + break; + case 31: + dpb = 18000; + break; + case 32: + dpb = 20480; + break; + case 40: + case 41: + dpb = 32768; + break; + case 42: + dpb = 34816; + break; + case 50: + dpb = 110400; + break; + default: + case 51: + case 52: + dpb = 184320; + break; + } + + return MIN2(dpb / (w * h), 16); } /** @@ -194,7 +191,7 @@ static unsigned get_cpb_num(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); } /** @@ -202,7 +199,7 @@ struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); } /** @@ -210,29 +207,29 @@ struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc) */ struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc) { - return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); } /** * Calculate the offsets into the CPB */ -void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, - signed *luma_offset, signed *chroma_offset) +void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, + signed *chroma_offset) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - unsigned pitch, vpitch, fsize; - - if (sscreen->info.chip_class < GFX9) { - pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); - vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); - } else { - pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); - vpitch = align(enc->luma->u.gfx9.surf_height, 16); - } - fsize = pitch * (vpitch + vpitch / 2); - - *luma_offset = slot->index * fsize; - *chroma_offset = *luma_offset + pitch * vpitch; + struct si_screen *sscreen = (struct si_screen *)enc->screen; + unsigned pitch, vpitch, fsize; + + if (sscreen->info.chip_class < GFX9) { + pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); + vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); + } else { + pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); + vpitch = align(enc->luma->u.gfx9.surf_height, 16); + } + fsize = pitch * (vpitch + vpitch / 2); + + *luma_offset = slot->index * fsize; + *chroma_offset = *luma_offset + pitch * vpitch; } /** @@ -240,133 +237,128 @@ void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, */ static void rvce_destroy(struct pipe_video_codec *encoder) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - if (enc->stream_handle) { - struct rvid_buffer fb; - si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->session(enc); - enc->destroy(enc); - flush(enc); - si_vid_destroy_buffer(&fb); - } - si_vid_destroy_buffer(&enc->cpb); - enc->ws->cs_destroy(enc->cs); - FREE(enc->cpb_array); - FREE(enc); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + if (enc->stream_handle) { + struct rvid_buffer fb; + si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->destroy(enc); + flush(enc); + si_vid_destroy_buffer(&fb); + } + si_vid_destroy_buffer(&enc->cpb); + enc->ws->cs_destroy(enc->cs); + FREE(enc->cpb_array); + FREE(enc); } -static void rvce_begin_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; - struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; - - bool need_rate_control = - enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || - enc->pic.quant_i_frames != pic->quant_i_frames || - enc->pic.quant_p_frames != pic->quant_p_frames || - enc->pic.quant_b_frames != pic->quant_b_frames; - - enc->pic = *pic; - si_get_pic_param(enc, pic); - - enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); - enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); - - if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - reset_cpb(enc); - else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) - sort_cpb(enc); - - if (!enc->stream_handle) { - struct rvid_buffer fb; - enc->stream_handle = si_vid_alloc_stream_handle(); - si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); - enc->fb = &fb; - enc->session(enc); - enc->create(enc); - enc->config(enc); - enc->feedback(enc); - flush(enc); - //dump_feedback(enc, &fb); - si_vid_destroy_buffer(&fb); - need_rate_control = false; - } - - if (need_rate_control) { - enc->session(enc); - enc->config(enc); - flush(enc); - } + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; + struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; + + bool need_rate_control = + enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || + enc->pic.quant_i_frames != pic->quant_i_frames || + enc->pic.quant_p_frames != pic->quant_p_frames || + enc->pic.quant_b_frames != pic->quant_b_frames || + enc->pic.rate_ctrl.target_bitrate != pic->rate_ctrl.target_bitrate; + + enc->pic = *pic; + enc->si_get_pic_param(enc, pic); + + enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); + enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); + + if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + reset_cpb(enc); + else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) + sort_cpb(enc); + + if (!enc->stream_handle) { + struct rvid_buffer fb; + enc->stream_handle = si_vid_alloc_stream_handle(); + si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->create(enc); + enc->config(enc); + enc->feedback(enc); + flush(enc); + // dump_feedback(enc, &fb); + si_vid_destroy_buffer(&fb); + need_rate_control = false; + } + + if (need_rate_control) { + enc->session(enc); + enc->config(enc); + flush(enc); + } } static void rvce_encode_bitstream(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_resource *destination, - void **fb) + struct pipe_video_buffer *source, + struct pipe_resource *destination, void **fb) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - enc->get_buffer(destination, &enc->bs_handle, NULL); - enc->bs_size = destination->width0; - - *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); - if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't create feedback buffer.\n"); - return; - } - if (!radeon_emitted(enc->cs, 0)) - enc->session(enc); - enc->encode(enc); - enc->feedback(enc); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + enc->get_buffer(destination, &enc->bs_handle, NULL); + enc->bs_size = destination->width0; + + *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); + if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't create feedback buffer.\n"); + return; + } + if (!radeon_emitted(enc->cs, 0)) + enc->session(enc); + enc->encode(enc); + enc->feedback(enc); } -static void rvce_end_frame(struct pipe_video_codec *encoder, - struct pipe_video_buffer *source, - struct pipe_picture_desc *picture) +static void rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct rvce_cpb_slot *slot = LIST_ENTRY( - struct rvce_cpb_slot, enc->cpb_slots.prev, list); - - if (!enc->dual_inst || enc->bs_idx > 1) - flush(enc); - - /* update the CPB backtrack with the just encoded frame */ - slot->picture_type = enc->pic.picture_type; - slot->frame_num = enc->pic.frame_num; - slot->pic_order_cnt = enc->pic.pic_order_cnt; - if (!enc->pic.not_referenced) { - LIST_DEL(&slot->list); - LIST_ADD(&slot->list, &enc->cpb_slots); - } + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct rvce_cpb_slot *slot = LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); + + if (!enc->dual_inst || enc->bs_idx > 1) + flush(enc); + + /* update the CPB backtrack with the just encoded frame */ + slot->picture_type = enc->pic.picture_type; + slot->frame_num = enc->pic.frame_num; + slot->pic_order_cnt = enc->pic.pic_order_cnt; + if (!enc->pic.not_referenced) { + list_del(&slot->list); + list_add(&slot->list, &enc->cpb_slots); + } } -static void rvce_get_feedback(struct pipe_video_codec *encoder, - void *feedback, unsigned *size) +static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; - struct rvid_buffer *fb = feedback; - - if (size) { - uint32_t *ptr = enc->ws->buffer_map( - fb->res->buf, enc->cs, - PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); - - if (ptr[1]) { - *size = ptr[4] - ptr[9]; - } else { - *size = 0; - } - - enc->ws->buffer_unmap(fb->res->buf); - } - //dump_feedback(enc, fb); - si_vid_destroy_buffer(fb); - FREE(fb); + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; + struct rvid_buffer *fb = feedback; + + if (size) { + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); + + if (ptr[1]) { + *size = ptr[4] - ptr[9]; + } else { + *size = 0; + } + + enc->ws->buffer_unmap(fb->res->buf); + } + // dump_feedback(enc, fb); + si_vid_destroy_buffer(fb); + FREE(fb); } /** @@ -374,158 +366,147 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, */ static void rvce_flush(struct pipe_video_codec *encoder) { - struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + struct rvce_encoder *enc = (struct rvce_encoder *)encoder; - flush(enc); + flush(enc); } -static void rvce_cs_flush(void *ctx, unsigned flags, - struct pipe_fence_handle **fence) +static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { - // just ignored + // just ignored } struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templ, - struct radeon_winsys* ws, - rvce_get_buffer get_buffer) + const struct pipe_video_codec *templ, + struct radeon_winsys *ws, rvce_get_buffer get_buffer) { - struct si_screen *sscreen = (struct si_screen *)context->screen; - struct si_context *sctx = (struct si_context*)context; - struct rvce_encoder *enc; - struct pipe_video_buffer *tmp_buf, templat = {}; - struct radeon_surf *tmp_surf; - unsigned cpb_size; - - if (!sscreen->info.vce_fw_version) { - RVID_ERR("Kernel doesn't supports VCE!\n"); - return NULL; - - } else if (!si_vce_is_fw_version_supported(sscreen)) { - RVID_ERR("Unsupported VCE fw version loaded!\n"); - return NULL; - } - - enc = CALLOC_STRUCT(rvce_encoder); - if (!enc) - return NULL; - - if (sscreen->info.is_amdgpu) - enc->use_vm = true; - if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || - sscreen->info.is_amdgpu) - enc->use_vui = true; - if (sscreen->info.family >= CHIP_TONGA && - sscreen->info.family != CHIP_STONEY && - sscreen->info.family != CHIP_POLARIS11 && - sscreen->info.family != CHIP_POLARIS12 && - sscreen->info.family != CHIP_VEGAM) - enc->dual_pipe = true; - /* TODO enable B frame with dual instance */ - if ((sscreen->info.family >= CHIP_TONGA) && - (templ->max_references == 1) && - (sscreen->info.vce_harvest_config == 0)) - enc->dual_inst = true; - - enc->base = *templ; - enc->base.context = context; - - enc->base.destroy = rvce_destroy; - enc->base.begin_frame = rvce_begin_frame; - enc->base.encode_bitstream = rvce_encode_bitstream; - enc->base.end_frame = rvce_end_frame; - enc->base.flush = rvce_flush; - enc->base.get_feedback = rvce_get_feedback; - enc->get_buffer = get_buffer; - - enc->screen = context->screen; - enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); - if (!enc->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - templat.buffer_format = PIPE_FORMAT_NV12; - templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; - templat.width = enc->base.width; - templat.height = enc->base.height; - templat.interlaced = false; - if (!(tmp_buf = context->create_video_buffer(context, &templat))) { - RVID_ERR("Can't create video buffer.\n"); - goto error; - } - - enc->cpb_num = get_cpb_num(enc); - if (!enc->cpb_num) - goto error; - - get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - - cpb_size = (sscreen->info.chip_class < GFX9) ? - align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * - align(tmp_surf->u.legacy.level[0].nblk_y, 32) : - - align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * - align(tmp_surf->u.gfx9.surf_height, 32); - - cpb_size = cpb_size * 3 / 2; - cpb_size = cpb_size * enc->cpb_num; - if (enc->dual_pipe) - cpb_size += RVCE_MAX_AUX_BUFFER_NUM * - RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - tmp_buf->destroy(tmp_buf); - if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't create CPB buffer.\n"); - goto error; - } - - enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); - if (!enc->cpb_array) - goto error; - - reset_cpb(enc); - - switch (sscreen->info.vce_fw_version) { - case FW_40_2_2: - si_vce_40_2_2_init(enc); - si_get_pic_param = si_vce_40_2_2_get_param; - break; - - case FW_50_0_1: - case FW_50_1_2: - case FW_50_10_2: - case FW_50_17_3: - si_vce_50_init(enc); - si_get_pic_param = si_vce_50_get_param; - break; - - case FW_52_0_3: - case FW_52_4_3: - case FW_52_8_3: - si_vce_52_init(enc); - si_get_pic_param = si_vce_52_get_param; - break; - - default: - if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { - si_vce_52_init(enc); - si_get_pic_param = si_vce_52_get_param; - } else - goto error; - } - - return &enc->base; + struct si_screen *sscreen = (struct si_screen *)context->screen; + struct si_context *sctx = (struct si_context *)context; + struct rvce_encoder *enc; + struct pipe_video_buffer *tmp_buf, templat = {}; + struct radeon_surf *tmp_surf; + unsigned cpb_size; + + if (!sscreen->info.vce_fw_version) { + RVID_ERR("Kernel doesn't supports VCE!\n"); + return NULL; + + } else if (!si_vce_is_fw_version_supported(sscreen)) { + RVID_ERR("Unsupported VCE fw version loaded!\n"); + return NULL; + } + + enc = CALLOC_STRUCT(rvce_encoder); + if (!enc) + return NULL; + + if (sscreen->info.is_amdgpu) + enc->use_vm = true; + if ((!sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 42) || sscreen->info.is_amdgpu) + enc->use_vui = true; + if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY && + sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 && + sscreen->info.family != CHIP_VEGAM) + enc->dual_pipe = true; + /* TODO enable B frame with dual instance */ + if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) && + (sscreen->info.vce_harvest_config == 0)) + enc->dual_inst = true; + + enc->base = *templ; + enc->base.context = context; + + enc->base.destroy = rvce_destroy; + enc->base.begin_frame = rvce_begin_frame; + enc->base.encode_bitstream = rvce_encode_bitstream; + enc->base.end_frame = rvce_end_frame; + enc->base.flush = rvce_flush; + enc->base.get_feedback = rvce_get_feedback; + enc->get_buffer = get_buffer; + + enc->screen = context->screen; + enc->ws = ws; + enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); + if (!enc->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + templat.buffer_format = PIPE_FORMAT_NV12; + templat.width = enc->base.width; + templat.height = enc->base.height; + templat.interlaced = false; + if (!(tmp_buf = context->create_video_buffer(context, &templat))) { + RVID_ERR("Can't create video buffer.\n"); + goto error; + } + + enc->cpb_num = get_cpb_num(enc); + if (!enc->cpb_num) + goto error; + + get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); + + cpb_size = (sscreen->info.chip_class < GFX9) + ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32) + : + + align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * + align(tmp_surf->u.gfx9.surf_height, 32); + + cpb_size = cpb_size * 3 / 2; + cpb_size = cpb_size * enc->cpb_num; + if (enc->dual_pipe) + cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + tmp_buf->destroy(tmp_buf); + if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create CPB buffer.\n"); + goto error; + } + + enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); + if (!enc->cpb_array) + goto error; + + reset_cpb(enc); + + switch (sscreen->info.vce_fw_version) { + case FW_40_2_2: + si_vce_40_2_2_init(enc); + break; + + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + si_vce_50_init(enc); + break; + + case FW_52_0_3: + case FW_52_4_3: + case FW_52_8_3: + si_vce_52_init(enc); + break; + + default: + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) { + si_vce_52_init(enc); + } else + goto error; + } + + return &enc->base; error: - if (enc->cs) - enc->ws->cs_destroy(enc->cs); + if (enc->cs) + enc->ws->cs_destroy(enc->cs); - si_vid_destroy_buffer(&enc->cpb); + si_vid_destroy_buffer(&enc->cpb); - FREE(enc->cpb_array); - FREE(enc); - return NULL; + FREE(enc->cpb_array); + FREE(enc); + return NULL; } /** @@ -533,44 +514,42 @@ error: */ bool si_vce_is_fw_version_supported(struct si_screen *sscreen) { - switch (sscreen->info.vce_fw_version) { - case FW_40_2_2: - case FW_50_0_1: - case FW_50_1_2: - case FW_50_10_2: - case FW_50_17_3: - case FW_52_0_3: - case FW_52_4_3: - case FW_52_8_3: - return true; - default: - if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) - return true; - else - return false; - } + switch (sscreen->info.vce_fw_version) { + case FW_40_2_2: + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + case FW_52_0_3: + case FW_52_4_3: + case FW_52_8_3: + return true; + default: + if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) + return true; + else + return false; + } } /** * Add the buffer as relocation to the current command submission */ -void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset) +void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, + enum radeon_bo_domain domain, signed offset) { - int reloc_idx; - - reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - if (enc->use_vm) { - uint64_t addr; - addr = enc->ws->buffer_get_virtual_address(buf); - addr = addr + offset; - RVCE_CS(addr >> 32); - RVCE_CS(addr); - } else { - offset += enc->ws->buffer_get_reloc_offset(buf); - RVCE_CS(reloc_idx * 4); - RVCE_CS(offset); - } + int reloc_idx; + + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + if (enc->use_vm) { + uint64_t addr; + addr = enc->ws->buffer_get_virtual_address(buf); + addr = addr + offset; + RVCE_CS(addr >> 32); + RVCE_CS(addr); + } else { + offset += enc->ws->buffer_get_reloc_offset(buf); + RVCE_CS(reloc_idx * 4); + RVCE_CS(offset); + } } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index e17468c90..3653540ea 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -25,403 +25,400 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void session(struct rvce_encoder *enc) { - RVCE_BEGIN(0x00000001); // session cmd - RVCE_CS(enc->stream_handle); - RVCE_END(); + RVCE_BEGIN(0x00000001); // session cmd + RVCE_CS(enc->stream_handle); + RVCE_END(); } -static void task_info(struct rvce_encoder *enc, uint32_t op, - uint32_t dep, uint32_t fb_idx, uint32_t ring_idx) +static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx) { - RVCE_BEGIN(0x00000002); // task info - if (op == 0x3) { - if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; - // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; - } - enc->task_info_idx = enc->cs->current.cdw; - } - RVCE_CS(0xffffffff); // offsetOfNextTaskInfo - RVCE_CS(op); // taskOperation - RVCE_CS(dep); // referencePictureDependency - RVCE_CS(0x00000000); // collocateFlagDependency - RVCE_CS(fb_idx); // feedbackIndex - RVCE_CS(ring_idx); // videoBitstreamRingIndex - RVCE_END(); + RVCE_BEGIN(0x00000002); // task info + if (op == 0x3) { + if (enc->task_info_idx) { + uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + // Update offsetOfNextTaskInfo + enc->cs->current.buf[enc->task_info_idx] = offs; + } + enc->task_info_idx = enc->cs->current.cdw; + } + RVCE_CS(0xffffffff); // offsetOfNextTaskInfo + RVCE_CS(op); // taskOperation + RVCE_CS(dep); // referencePictureDependency + RVCE_CS(0x00000000); // collocateFlagDependency + RVCE_CS(fb_idx); // feedbackIndex + RVCE_CS(ring_idx); // videoBitstreamRingIndex + RVCE_END(); } static void feedback(struct rvce_encoder *enc) { - RVCE_BEGIN(0x05000005); // feedback buffer - RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo - RVCE_CS(0x00000001); // feedbackRingSize - RVCE_END(); + RVCE_BEGIN(0x05000005); // feedback buffer + RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo + RVCE_CS(0x00000001); // feedbackRingSize + RVCE_END(); } static void create(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000000, 0, 0, 0); - - RVCE_BEGIN(0x01000001); // create cmd - RVCE_CS(0x00000000); // encUseCircularBuffer - RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile - RVCE_CS(enc->base.level); // encLevel - RVCE_CS(0x00000000); // encPicStructRestriction - RVCE_CS(enc->base.width); // encImageWidth - RVCE_CS(enc->base.height); // encImageHeight - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw - RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO - RVCE_END(); + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(0x00000000); // encUseCircularBuffer + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(0x00000000); // encPicStructRestriction + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw + RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO + RVCE_END(); } static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod - RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate - RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum - RVCE_CS(0x00000000); // encGOPSize - RVCE_CS(enc->pic.quant_i_frames); // encQP_I - RVCE_CS(enc->pic.quant_p_frames); // encQP_P - RVCE_CS(enc->pic.quant_b_frames); // encQP_B - RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen - RVCE_CS(0x00000000); // encVBVBufferLevel - RVCE_CS(0x00000000); // encMaxAUSize - RVCE_CS(0x00000000); // encQPInitialMode - RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional - RVCE_CS(0x00000000); // encMinQP - RVCE_CS(0x00000033); // encMaxQP - RVCE_CS(0x00000000); // encSkipFrameEnable - RVCE_CS(0x00000000); // encFillerDataEnable - RVCE_CS(0x00000000); // encEnforceHRD - RVCE_CS(0x00000000); // encBPicsDeltaQP - RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP - RVCE_CS(0x00000000); // encRateControlReInitDisable - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod + RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate + RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum + RVCE_CS(0x00000000); // encGOPSize + RVCE_CS(enc->pic.quant_i_frames); // encQP_I + RVCE_CS(enc->pic.quant_p_frames); // encQP_P + RVCE_CS(enc->pic.quant_b_frames); // encQP_B + RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen + RVCE_CS(0x00000000); // encVBVBufferLevel + RVCE_CS(0x00000000); // encMaxAUSize + RVCE_CS(0x00000000); // encQPInitialMode + RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional + RVCE_CS(0x00000000); // encMinQP + RVCE_CS(0x00000033); // encMaxQP + RVCE_CS(0x00000000); // encSkipFrameEnable + RVCE_CS(0x00000000); // encFillerDataEnable + RVCE_CS(0x00000000); // encEnforceHRD + RVCE_CS(0x00000000); // encBPicsDeltaQP + RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP + RVCE_CS(0x00000000); // encRateControlReInitDisable + RVCE_END(); } static void config_extension(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000001); // config extension - RVCE_CS(0x00000003); // encEnablePerfLogging - RVCE_END(); + RVCE_BEGIN(0x04000001); // config extension + RVCE_CS(0x00000003); // encEnablePerfLogging + RVCE_END(); } static void pic_control(struct rvce_encoder *enc) { - unsigned encNumMBsPerSlice; - - encNumMBsPerSlice = align(enc->base.width, 16) / 16; - encNumMBsPerSlice *= align(enc->base.height, 16) / 16; - - RVCE_BEGIN(0x04000002); // pic control - RVCE_CS(0x00000000); // encUseConstrainedIntraPred - RVCE_CS(0x00000000); // encCABACEnable - RVCE_CS(0x00000000); // encCABACIDC - RVCE_CS(0x00000000); // encLoopFilterDisable - RVCE_CS(0x00000000); // encLFBetaOffset - RVCE_CS(0x00000000); // encLFAlphaC0Offset - RVCE_CS(0x00000000); // encCropLeftOffset - RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset - RVCE_CS(0x00000000); // encCropTopOffset - RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset - RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice - RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot - RVCE_CS(0x00000000); // encForceIntraRefresh - RVCE_CS(0x00000000); // encForceIMBPeriod - RVCE_CS(0x00000000); // encPicOrderCntType - RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4 - RVCE_CS(0x00000000); // encSPSID - RVCE_CS(0x00000000); // encPPSID - RVCE_CS(0x00000040); // encConstraintSetFlags - RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern - RVCE_CS(0x00000000); // weightPredModeBPicture - RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames - RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames - RVCE_CS(0x00000001); // encNumDefaultActiveRefL0 - RVCE_CS(0x00000001); // encNumDefaultActiveRefL1 - RVCE_CS(0x00000000); // encSliceMode - RVCE_CS(0x00000000); // encMaxSliceSize - RVCE_END(); + unsigned encNumMBsPerSlice; + + encNumMBsPerSlice = align(enc->base.width, 16) / 16; + encNumMBsPerSlice *= align(enc->base.height, 16) / 16; + + RVCE_BEGIN(0x04000002); // pic control + RVCE_CS(0x00000000); // encUseConstrainedIntraPred + RVCE_CS(0x00000000); // encCABACEnable + RVCE_CS(0x00000000); // encCABACIDC + RVCE_CS(0x00000000); // encLoopFilterDisable + RVCE_CS(0x00000000); // encLFBetaOffset + RVCE_CS(0x00000000); // encLFAlphaC0Offset + RVCE_CS(0x00000000); // encCropLeftOffset + RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset + RVCE_CS(0x00000000); // encCropTopOffset + RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset + RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice + RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot + RVCE_CS(0x00000000); // encForceIntraRefresh + RVCE_CS(0x00000000); // encForceIMBPeriod + RVCE_CS(0x00000000); // encPicOrderCntType + RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4 + RVCE_CS(0x00000000); // encSPSID + RVCE_CS(0x00000000); // encPPSID + RVCE_CS(0x00000040); // encConstraintSetFlags + RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern + RVCE_CS(0x00000000); // weightPredModeBPicture + RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames + RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames + RVCE_CS(0x00000001); // encNumDefaultActiveRefL0 + RVCE_CS(0x00000001); // encNumDefaultActiveRefL1 + RVCE_CS(0x00000000); // encSliceMode + RVCE_CS(0x00000000); // encMaxSliceSize + RVCE_END(); } static void motion_estimation(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000007); // motion estimation - RVCE_CS(0x00000001); // encIMEDecimationSearch - RVCE_CS(0x00000001); // motionEstHalfPixel - RVCE_CS(0x00000000); // motionEstQuarterPixel - RVCE_CS(0x00000000); // disableFavorPMVPoint - RVCE_CS(0x00000000); // forceZeroPointCenter - RVCE_CS(0x00000000); // LSMVert - RVCE_CS(0x00000010); // encSearchRangeX - RVCE_CS(0x00000010); // encSearchRangeY - RVCE_CS(0x00000010); // encSearch1RangeX - RVCE_CS(0x00000010); // encSearch1RangeY - RVCE_CS(0x00000000); // disable16x16Frame1 - RVCE_CS(0x00000000); // disableSATD - RVCE_CS(0x00000000); // enableAMD - RVCE_CS(0x000000fe); // encDisableSubMode - RVCE_CS(0x00000000); // encIMESkipX - RVCE_CS(0x00000000); // encIMESkipY - RVCE_CS(0x00000000); // encEnImeOverwDisSubm - RVCE_CS(0x00000000); // encImeOverwDisSubmNo - RVCE_CS(0x00000001); // encIME2SearchRangeX - RVCE_CS(0x00000001); // encIME2SearchRangeY - RVCE_CS(0x00000000); // parallelModeSpeedupEnable - RVCE_CS(0x00000000); // fme0_encDisableSubMode - RVCE_CS(0x00000000); // fme1_encDisableSubMode - RVCE_CS(0x00000000); // imeSWSpeedupEnable - RVCE_END(); + RVCE_BEGIN(0x04000007); // motion estimation + RVCE_CS(0x00000001); // encIMEDecimationSearch + RVCE_CS(0x00000001); // motionEstHalfPixel + RVCE_CS(0x00000000); // motionEstQuarterPixel + RVCE_CS(0x00000000); // disableFavorPMVPoint + RVCE_CS(0x00000000); // forceZeroPointCenter + RVCE_CS(0x00000000); // LSMVert + RVCE_CS(0x00000010); // encSearchRangeX + RVCE_CS(0x00000010); // encSearchRangeY + RVCE_CS(0x00000010); // encSearch1RangeX + RVCE_CS(0x00000010); // encSearch1RangeY + RVCE_CS(0x00000000); // disable16x16Frame1 + RVCE_CS(0x00000000); // disableSATD + RVCE_CS(0x00000000); // enableAMD + RVCE_CS(0x000000fe); // encDisableSubMode + RVCE_CS(0x00000000); // encIMESkipX + RVCE_CS(0x00000000); // encIMESkipY + RVCE_CS(0x00000000); // encEnImeOverwDisSubm + RVCE_CS(0x00000000); // encImeOverwDisSubmNo + RVCE_CS(0x00000001); // encIME2SearchRangeX + RVCE_CS(0x00000001); // encIME2SearchRangeY + RVCE_CS(0x00000000); // parallelModeSpeedupEnable + RVCE_CS(0x00000000); // fme0_encDisableSubMode + RVCE_CS(0x00000000); // fme1_encDisableSubMode + RVCE_CS(0x00000000); // imeSWSpeedupEnable + RVCE_END(); } static void rdo(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000008); // rdo - RVCE_CS(0x00000000); // encDisableTbePredIFrame - RVCE_CS(0x00000000); // encDisableTbePredPFrame - RVCE_CS(0x00000000); // useFmeInterpolY - RVCE_CS(0x00000000); // useFmeInterpolUV - RVCE_CS(0x00000000); // useFmeIntrapolY - RVCE_CS(0x00000000); // useFmeIntrapolUV - RVCE_CS(0x00000000); // useFmeInterpolY_1 - RVCE_CS(0x00000000); // useFmeInterpolUV_1 - RVCE_CS(0x00000000); // useFmeIntrapolY_1 - RVCE_CS(0x00000000); // useFmeIntrapolUV_1 - RVCE_CS(0x00000000); // enc16x16CostAdj - RVCE_CS(0x00000000); // encSkipCostAdj - RVCE_CS(0x00000000); // encForce16x16skip - RVCE_CS(0x00000000); // encDisableThresholdCalcA - RVCE_CS(0x00000000); // encLumaCoeffCost - RVCE_CS(0x00000000); // encLumaMBCoeffCost - RVCE_CS(0x00000000); // encChromaCoeffCost - RVCE_END(); + RVCE_BEGIN(0x04000008); // rdo + RVCE_CS(0x00000000); // encDisableTbePredIFrame + RVCE_CS(0x00000000); // encDisableTbePredPFrame + RVCE_CS(0x00000000); // useFmeInterpolY + RVCE_CS(0x00000000); // useFmeInterpolUV + RVCE_CS(0x00000000); // useFmeIntrapolY + RVCE_CS(0x00000000); // useFmeIntrapolUV + RVCE_CS(0x00000000); // useFmeInterpolY_1 + RVCE_CS(0x00000000); // useFmeInterpolUV_1 + RVCE_CS(0x00000000); // useFmeIntrapolY_1 + RVCE_CS(0x00000000); // useFmeIntrapolUV_1 + RVCE_CS(0x00000000); // enc16x16CostAdj + RVCE_CS(0x00000000); // encSkipCostAdj + RVCE_CS(0x00000000); // encForce16x16skip + RVCE_CS(0x00000000); // encDisableThresholdCalcA + RVCE_CS(0x00000000); // encLumaCoeffCost + RVCE_CS(0x00000000); // encLumaMBCoeffCost + RVCE_CS(0x00000000); // encChromaCoeffCost + RVCE_END(); } static void vui(struct rvce_encoder *enc) { - int i; - - if (!enc->pic.rate_ctrl.frame_rate_num) - return; - - RVCE_BEGIN(0x04000009); // vui - RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag - RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc - RVCE_CS(0x00000000); //aspectRatioInfo.sarWidth - RVCE_CS(0x00000000); //aspectRatioInfo.sarHeight - RVCE_CS(0x00000000); //overscanInfoPresentFlag - RVCE_CS(0x00000000); //overScanInfo.overscanAppropFlag - RVCE_CS(0x00000000); //videoSignalTypePresentFlag - RVCE_CS(0x00000005); //videoSignalTypeInfo.videoFormat - RVCE_CS(0x00000000); //videoSignalTypeInfo.videoFullRangeFlag - RVCE_CS(0x00000000); //videoSignalTypeInfo.colorDescriptionPresentFlag - RVCE_CS(0x00000002); //videoSignalTypeInfo.colorPrim - RVCE_CS(0x00000002); //videoSignalTypeInfo.transferChar - RVCE_CS(0x00000002); //videoSignalTypeInfo.matrixCoef - RVCE_CS(0x00000000); //chromaLocInfoPresentFlag - RVCE_CS(0x00000000); //chromaLocInfo.chromaLocTop - RVCE_CS(0x00000000); //chromaLocInfo.chromaLocBottom - RVCE_CS(0x00000001); //timingInfoPresentFlag - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); //timingInfo.numUnitsInTick - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); //timingInfo.timeScale; - RVCE_CS(0x00000001); //timingInfo.fixedFrameRateFlag - RVCE_CS(0x00000000); //nalHRDParametersPresentFlag - RVCE_CS(0x00000000); //hrdParam.cpbCntMinus1 - RVCE_CS(0x00000004); //hrdParam.bitRateScale - RVCE_CS(0x00000006); //hrdParam.cpbSizeScale - for (i = 0; i < 32; i++) { - RVCE_CS(0x00000000); //hrdParam.bitRateValueMinus - RVCE_CS(0x00000000); //hrdParam.cpbSizeValueMinus - RVCE_CS(0x00000000); //hrdParam.cbrFlag - } - RVCE_CS(0x00000017); //hrdParam.initialCpbRemovalDelayLengthMinus1 - RVCE_CS(0x00000017); //hrdParam.cpbRemovalDelayLengthMinus1 - RVCE_CS(0x00000017); //hrdParam.dpbOutputDelayLengthMinus1 - RVCE_CS(0x00000018); //hrdParam.timeOffsetLength - RVCE_CS(0x00000000); //lowDelayHRDFlag - RVCE_CS(0x00000000); //picStructPresentFlag - RVCE_CS(0x00000000); //bitstreamRestrictionPresentFlag - RVCE_CS(0x00000001); //bitstreamRestrictions.motionVectorsOverPicBoundariesFlag - RVCE_CS(0x00000002); //bitstreamRestrictions.maxBytesPerPicDenom - RVCE_CS(0x00000001); //bitstreamRestrictions.maxBitsPerMbDenom - RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthHori - RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthVert - RVCE_CS(0x00000003); //bitstreamRestrictions.numReorderFrames - RVCE_CS(0x00000003); //bitstreamRestrictions.maxDecFrameBuffering - RVCE_END(); + int i; + + if (!enc->pic.rate_ctrl.frame_rate_num) + return; + + RVCE_BEGIN(0x04000009); // vui + RVCE_CS(0x00000000); // aspectRatioInfoPresentFlag + RVCE_CS(0x00000000); // aspectRatioInfo.aspectRatioIdc + RVCE_CS(0x00000000); // aspectRatioInfo.sarWidth + RVCE_CS(0x00000000); // aspectRatioInfo.sarHeight + RVCE_CS(0x00000000); // overscanInfoPresentFlag + RVCE_CS(0x00000000); // overScanInfo.overscanAppropFlag + RVCE_CS(0x00000000); // videoSignalTypePresentFlag + RVCE_CS(0x00000005); // videoSignalTypeInfo.videoFormat + RVCE_CS(0x00000000); // videoSignalTypeInfo.videoFullRangeFlag + RVCE_CS(0x00000000); // videoSignalTypeInfo.colorDescriptionPresentFlag + RVCE_CS(0x00000002); // videoSignalTypeInfo.colorPrim + RVCE_CS(0x00000002); // videoSignalTypeInfo.transferChar + RVCE_CS(0x00000002); // videoSignalTypeInfo.matrixCoef + RVCE_CS(0x00000000); // chromaLocInfoPresentFlag + RVCE_CS(0x00000000); // chromaLocInfo.chromaLocTop + RVCE_CS(0x00000000); // chromaLocInfo.chromaLocBottom + RVCE_CS(0x00000001); // timingInfoPresentFlag + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // timingInfo.numUnitsInTick + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); // timingInfo.timeScale; + RVCE_CS(0x00000001); // timingInfo.fixedFrameRateFlag + RVCE_CS(0x00000000); // nalHRDParametersPresentFlag + RVCE_CS(0x00000000); // hrdParam.cpbCntMinus1 + RVCE_CS(0x00000004); // hrdParam.bitRateScale + RVCE_CS(0x00000006); // hrdParam.cpbSizeScale + for (i = 0; i < 32; i++) { + RVCE_CS(0x00000000); // hrdParam.bitRateValueMinus + RVCE_CS(0x00000000); // hrdParam.cpbSizeValueMinus + RVCE_CS(0x00000000); // hrdParam.cbrFlag + } + RVCE_CS(0x00000017); // hrdParam.initialCpbRemovalDelayLengthMinus1 + RVCE_CS(0x00000017); // hrdParam.cpbRemovalDelayLengthMinus1 + RVCE_CS(0x00000017); // hrdParam.dpbOutputDelayLengthMinus1 + RVCE_CS(0x00000018); // hrdParam.timeOffsetLength + RVCE_CS(0x00000000); // lowDelayHRDFlag + RVCE_CS(0x00000000); // picStructPresentFlag + RVCE_CS(0x00000000); // bitstreamRestrictionPresentFlag + RVCE_CS(0x00000001); // bitstreamRestrictions.motionVectorsOverPicBoundariesFlag + RVCE_CS(0x00000002); // bitstreamRestrictions.maxBytesPerPicDenom + RVCE_CS(0x00000001); // bitstreamRestrictions.maxBitsPerMbDenom + RVCE_CS(0x00000010); // bitstreamRestrictions.log2MaxMvLengthHori + RVCE_CS(0x00000010); // bitstreamRestrictions.log2MaxMvLengthVert + RVCE_CS(0x00000003); // bitstreamRestrictions.numReorderFrames + RVCE_CS(0x00000003); // bitstreamRestrictions.maxDecFrameBuffering + RVCE_END(); } static void config(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); - enc->rate_control(enc); - enc->config_extension(enc); - enc->motion_estimation(enc); - enc->rdo(enc); - if (enc->use_vui) - enc->vui(enc); - enc->pic_control(enc); + enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); + enc->rate_control(enc); + enc->config_extension(enc); + enc->motion_estimation(enc); + enc->rdo(enc); + if (enc->use_vui) + enc->vui(enc); + enc->pic_control(enc); } static void encode(struct rvce_encoder *enc) { - signed luma_offset, chroma_offset; - int i; - - enc->task_info(enc, 0x00000003, 0, 0, 0); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo - RVCE_END(); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(0x00000000); // insertHeaders - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(0x00000000); // forceRefreshMap - RVCE_CS(0x00000000); // insertAUD - RVCE_CS(0x00000000); // endOfSequence - RVCE_CS(0x00000000); // endOfStream - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode - RVCE_CS(0x00000000); // encInputPicTileConfig - RVCE_CS(enc->pic.picture_type); // encPicType - RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - RVCE_CS(0x00000000); // encIdrPicId - RVCE_CS(0x00000000); // encMGSKeyPic - RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag - RVCE_CS(0x00000000); // encTemporalLayerIndex - RVCE_CS(0x00000000); // num_ref_idx_active_override_flag - RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 - RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 - - i = enc->pic.frame_num - enc->pic.ref_idx_l0; - if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - RVCE_CS(0x00000001); // encRefListModificationOp - RVCE_CS(i - 1); // encRefListModificationNum - } else { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - - for (i = 0; i < 3; ++i) { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - for (i = 0; i < 4; ++i) { - RVCE_CS(0x00000000); // encDecodedPictureMarkingOp - RVCE_CS(0x00000000); // encDecodedPictureMarkingNum - RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); // encPicType - RVCE_CS(l0->frame_num); // frameNumber - RVCE_CS(l0->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - // encReferencePictureL0[1] - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); // encPicType - RVCE_CS(l1->frame_num); // frameNumber - RVCE_CS(l1->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); // encReconstructedLumaOffset - RVCE_CS(chroma_offset); // encReconstructedChromaOffset - RVCE_CS(0x00000000); // encColocBufferOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset - RVCE_CS(0x00000000); // pictureCount - RVCE_CS(enc->pic.frame_num); // frameNumber - RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount - RVCE_CS(0x00000000); // numIPicRemainInRCGOP - RVCE_CS(0x00000000); // numPPicRemainInRCGOP - RVCE_CS(0x00000000); // numBPicRemainInRCGOP - RVCE_CS(0x00000000); // numIRPicRemainInRCGOP - RVCE_CS(0x00000000); // enableIntraRefresh - RVCE_END(); + signed luma_offset, chroma_offset; + int i; + + enc->task_info(enc, 0x00000003, 0, 0, 0); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo + RVCE_END(); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(0x00000000); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + RVCE_END(); } static void destroy(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000001, 0, 0, 0); + enc->task_info(enc, 0x00000001, 0, 0, 0); - feedback(enc); + feedback(enc); - RVCE_BEGIN(0x02000001); // destroy - RVCE_END(); + RVCE_BEGIN(0x02000001); // destroy + RVCE_END(); } void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) @@ -430,17 +427,18 @@ void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_pict void si_vce_40_2_2_init(struct rvce_encoder *enc) { - enc->session = session; - enc->task_info = task_info; - enc->create = create; - enc->feedback = feedback; - enc->rate_control = rate_control; - enc->config_extension = config_extension; - enc->pic_control = pic_control; - enc->motion_estimation = motion_estimation; - enc->rdo = rdo; - enc->vui = vui; - enc->config = config; - enc->encode = encode; - enc->destroy = destroy; + enc->session = session; + enc->task_info = task_info; + enc->create = create; + enc->feedback = feedback; + enc->rate_control = rate_control; + enc->config_extension = config_extension; + enc->pic_control = pic_control; + enc->motion_estimation = motion_estimation; + enc->rdo = rdo; + enc->vui = vui; + enc->config = config; + enc->encode = encode; + enc->destroy = destroy; + enc->si_get_pic_param = si_vce_40_2_2_get_param; } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c index f4cbc9bb8..9f972eef6 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_50.c @@ -25,206 +25,203 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod - RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate - RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate - RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum - RVCE_CS(0x00000000); // encGOPSize - RVCE_CS(enc->pic.quant_i_frames); // encQP_I - RVCE_CS(enc->pic.quant_p_frames); // encQP_P - RVCE_CS(enc->pic.quant_b_frames); // encQP_B - RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize - RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen - RVCE_CS(0x00000000); // encVBVBufferLevel - RVCE_CS(0x00000000); // encMaxAUSize - RVCE_CS(0x00000000); // encQPInitialMode - RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger - RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional - RVCE_CS(0x00000000); // encMinQP - RVCE_CS(0x00000033); // encMaxQP - RVCE_CS(0x00000000); // encSkipFrameEnable - RVCE_CS(0x00000000); // encFillerDataEnable - RVCE_CS(0x00000000); // encEnforceHRD - RVCE_CS(0x00000000); // encBPicsDeltaQP - RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP - RVCE_CS(0x00000000); // encRateControlReInitDisable - RVCE_CS(0x00000000); // encLCVBRInitQPFlag - RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod + RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate + RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate + RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum + RVCE_CS(0x00000000); // encGOPSize + RVCE_CS(enc->pic.quant_i_frames); // encQP_I + RVCE_CS(enc->pic.quant_p_frames); // encQP_P + RVCE_CS(enc->pic.quant_b_frames); // encQP_B + RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize + RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen + RVCE_CS(0x00000000); // encVBVBufferLevel + RVCE_CS(0x00000000); // encMaxAUSize + RVCE_CS(0x00000000); // encQPInitialMode + RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger + RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional + RVCE_CS(0x00000000); // encMinQP + RVCE_CS(0x00000033); // encMaxQP + RVCE_CS(0x00000000); // encSkipFrameEnable + RVCE_CS(0x00000000); // encFillerDataEnable + RVCE_CS(0x00000000); // encEnforceHRD + RVCE_CS(0x00000000); // encBPicsDeltaQP + RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP + RVCE_CS(0x00000000); // encRateControlReInitDisable + RVCE_CS(0x00000000); // encLCVBRInitQPFlag + RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag + RVCE_END(); } static void encode(struct rvce_encoder *enc) { - signed luma_offset, chroma_offset, bs_offset; - unsigned dep, bs_idx = enc->bs_idx++; - int i; - - if (enc->dual_inst) { - if (bs_idx == 0) - dep = 1; - else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - dep = 0; - else - dep = 2; - } else - dep = 0; - - enc->task_info(enc, 0x00000003, dep, 0, bs_idx); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo - RVCE_END(); - - bs_offset = -(signed)(bs_idx * enc->bs_size); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - if (enc->dual_pipe) { - unsigned aux_offset = enc->cpb.res->buf->size - - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - RVCE_BEGIN(0x05000002); // auxiliary buffer - for (i = 0; i < 8; ++i) { - RVCE_CS(aux_offset); - aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; - } - for (i = 0; i < 8; ++i) - RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); - RVCE_END(); - } - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(0x00000000); // forceRefreshMap - RVCE_CS(0x00000000); // insertAUD - RVCE_CS(0x00000000); // endOfSequence - RVCE_CS(0x00000000); // endOfStream - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - if (enc->dual_pipe) - RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) - else - RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) - RVCE_CS(0x00000000); // encInputPicTileConfig - RVCE_CS(enc->pic.picture_type); // encPicType - RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - RVCE_CS(0x00000000); // encIdrPicId - RVCE_CS(0x00000000); // encMGSKeyPic - RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag - RVCE_CS(0x00000000); // encTemporalLayerIndex - RVCE_CS(0x00000000); // num_ref_idx_active_override_flag - RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 - RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 - - i = enc->pic.frame_num - enc->pic.ref_idx_l0; - if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - RVCE_CS(0x00000001); // encRefListModificationOp - RVCE_CS(i - 1); // encRefListModificationNum - } else { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - - for (i = 0; i < 3; ++i) { - RVCE_CS(0x00000000); // encRefListModificationOp - RVCE_CS(0x00000000); // encRefListModificationNum - } - for (i = 0; i < 4; ++i) { - RVCE_CS(0x00000000); // encDecodedPictureMarkingOp - RVCE_CS(0x00000000); // encDecodedPictureMarkingNum - RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp - RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); // encPicType - RVCE_CS(l0->frame_num); // frameNumber - RVCE_CS(l0->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - // encReferencePictureL0[1] - RVCE_CS(0x00000000); // pictureStructure - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); // encPicType - RVCE_CS(l1->frame_num); // frameNumber - RVCE_CS(l1->pic_order_cnt); // pictureOrderCount - RVCE_CS(luma_offset); // lumaOffset - RVCE_CS(chroma_offset); // chromaOffset - } else { - RVCE_CS(0x00000000); // encPicType - RVCE_CS(0x00000000); // frameNumber - RVCE_CS(0x00000000); // pictureOrderCount - RVCE_CS(0xffffffff); // lumaOffset - RVCE_CS(0xffffffff); // chromaOffset - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); // encReconstructedLumaOffset - RVCE_CS(chroma_offset); // encReconstructedChromaOffset - RVCE_CS(0x00000000); // encColocBufferOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset - RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset - RVCE_CS(0x00000000); // pictureCount - RVCE_CS(enc->pic.frame_num); // frameNumber - RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount - RVCE_CS(0x00000000); // numIPicRemainInRCGOP - RVCE_CS(0x00000000); // numPPicRemainInRCGOP - RVCE_CS(0x00000000); // numBPicRemainInRCGOP - RVCE_CS(0x00000000); // numIRPicRemainInRCGOP - RVCE_CS(0x00000000); // enableIntraRefresh - RVCE_END(); + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = + enc->cpb.res->buf->size - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(0x00000000); // forceRefreshMap + RVCE_CS(0x00000000); // insertAUD + RVCE_CS(0x00000000); // endOfSequence + RVCE_CS(0x00000000); // endOfStream + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + if (enc->dual_pipe) + RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + else + RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading) + RVCE_CS(0x00000000); // encInputPicTileConfig + RVCE_CS(enc->pic.picture_type); // encPicType + RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + RVCE_CS(0x00000000); // encIdrPicId + RVCE_CS(0x00000000); // encMGSKeyPic + RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag + RVCE_CS(0x00000000); // encTemporalLayerIndex + RVCE_CS(0x00000000); // num_ref_idx_active_override_flag + RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1 + RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1 + + i = enc->pic.frame_num - enc->pic.ref_idx_l0; + if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + RVCE_CS(0x00000001); // encRefListModificationOp + RVCE_CS(i - 1); // encRefListModificationNum + } else { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + + for (i = 0; i < 3; ++i) { + RVCE_CS(0x00000000); // encRefListModificationOp + RVCE_CS(0x00000000); // encRefListModificationNum + } + for (i = 0; i < 4; ++i) { + RVCE_CS(0x00000000); // encDecodedPictureMarkingOp + RVCE_CS(0x00000000); // encDecodedPictureMarkingNum + RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp + RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); // encPicType + RVCE_CS(l0->frame_num); // frameNumber + RVCE_CS(l0->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + // encReferencePictureL0[1] + RVCE_CS(0x00000000); // pictureStructure + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); // encPicType + RVCE_CS(l1->frame_num); // frameNumber + RVCE_CS(l1->pic_order_cnt); // pictureOrderCount + RVCE_CS(luma_offset); // lumaOffset + RVCE_CS(chroma_offset); // chromaOffset + } else { + RVCE_CS(0x00000000); // encPicType + RVCE_CS(0x00000000); // frameNumber + RVCE_CS(0x00000000); // pictureOrderCount + RVCE_CS(0xffffffff); // lumaOffset + RVCE_CS(0xffffffff); // chromaOffset + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); // encReconstructedLumaOffset + RVCE_CS(chroma_offset); // encReconstructedChromaOffset + RVCE_CS(0x00000000); // encColocBufferOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset + RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset + RVCE_CS(0x00000000); // pictureCount + RVCE_CS(enc->pic.frame_num); // frameNumber + RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount + RVCE_CS(0x00000000); // numIPicRemainInRCGOP + RVCE_CS(0x00000000); // numPPicRemainInRCGOP + RVCE_CS(0x00000000); // numBPicRemainInRCGOP + RVCE_CS(0x00000000); // numIRPicRemainInRCGOP + RVCE_CS(0x00000000); // enableIntraRefresh + RVCE_END(); } void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) @@ -233,9 +230,10 @@ void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_ void si_vce_50_init(struct rvce_encoder *enc) { - si_vce_40_2_2_init(enc); + si_vce_40_2_2_init(enc); - /* only the two below are different */ - enc->rate_control = rate_control; - enc->encode = encode; + /* only the two below are different */ + enc->rate_control = rate_control; + enc->encode = encode; + enc->si_get_pic_param = si_vce_50_get_param; } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c index 364da4dbe..141032d66 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce_52.c @@ -25,646 +25,647 @@ * **************************************************************************/ -#include <stdio.h> - #include "pipe/p_video_codec.h" - -#include "util/u_video.h" +#include "radeon_vce.h" +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" - +#include "util/u_video.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <stdio.h> static void get_rate_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method; - enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate; - enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate; - enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames; - enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames; - enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames; - enc->enc_pic.rc.gop_size = pic->gop_size; - enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num; - enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den; - enc->enc_pic.rc.max_qp = 51; - enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; - enc->enc_pic.rc.vbv_buf_lv = pic->rate_ctrl.vbv_buf_lv; - enc->enc_pic.rc.fill_data_enable = pic->rate_ctrl.fill_data_enable; - enc->enc_pic.rc.enforce_hrd = pic->rate_ctrl.enforce_hrd; - enc->enc_pic.rc.target_bits_picture = pic->rate_ctrl.target_bits_picture; - enc->enc_pic.rc.peak_bits_picture_integer = pic->rate_ctrl.peak_bits_picture_integer; - enc->enc_pic.rc.peak_bits_picture_fraction = pic->rate_ctrl.peak_bits_picture_fraction; + enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method; + enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate; + enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate; + enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames; + enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames; + enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames; + enc->enc_pic.rc.gop_size = pic->gop_size; + enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num; + enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den; + enc->enc_pic.rc.max_qp = 51; + enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size; + enc->enc_pic.rc.vbv_buf_lv = pic->rate_ctrl.vbv_buf_lv; + enc->enc_pic.rc.fill_data_enable = pic->rate_ctrl.fill_data_enable; + enc->enc_pic.rc.enforce_hrd = pic->rate_ctrl.enforce_hrd; + enc->enc_pic.rc.target_bits_picture = pic->rate_ctrl.target_bits_picture; + enc->enc_pic.rc.peak_bits_picture_integer = pic->rate_ctrl.peak_bits_picture_integer; + enc->enc_pic.rc.peak_bits_picture_fraction = pic->rate_ctrl.peak_bits_picture_fraction; } -static void get_motion_estimation_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) +static void get_motion_estimation_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.me.motion_est_quarter_pixel = pic->motion_est.motion_est_quarter_pixel; - enc->enc_pic.me.enc_disable_sub_mode = pic->motion_est.enc_disable_sub_mode; - enc->enc_pic.me.lsmvert = pic->motion_est.lsmvert; - enc->enc_pic.me.enc_en_ime_overw_dis_subm = pic->motion_est.enc_en_ime_overw_dis_subm; - enc->enc_pic.me.enc_ime_overw_dis_subm_no = pic->motion_est.enc_ime_overw_dis_subm_no; - enc->enc_pic.me.enc_ime2_search_range_x = pic->motion_est.enc_ime2_search_range_x; - enc->enc_pic.me.enc_ime2_search_range_y = pic->motion_est.enc_ime2_search_range_y; - enc->enc_pic.me.enc_ime_decimation_search = 0x00000001; - enc->enc_pic.me.motion_est_half_pixel = 0x00000001; - enc->enc_pic.me.enc_search_range_x = 0x00000010; - enc->enc_pic.me.enc_search_range_y = 0x00000010; - enc->enc_pic.me.enc_search1_range_x = 0x00000010; - enc->enc_pic.me.enc_search1_range_y = 0x00000010; + enc->enc_pic.me.motion_est_quarter_pixel = pic->motion_est.motion_est_quarter_pixel; + enc->enc_pic.me.enc_disable_sub_mode = pic->motion_est.enc_disable_sub_mode; + enc->enc_pic.me.lsmvert = pic->motion_est.lsmvert; + enc->enc_pic.me.enc_en_ime_overw_dis_subm = pic->motion_est.enc_en_ime_overw_dis_subm; + enc->enc_pic.me.enc_ime_overw_dis_subm_no = pic->motion_est.enc_ime_overw_dis_subm_no; + enc->enc_pic.me.enc_ime2_search_range_x = pic->motion_est.enc_ime2_search_range_x; + enc->enc_pic.me.enc_ime2_search_range_y = pic->motion_est.enc_ime2_search_range_y; + enc->enc_pic.me.enc_ime_decimation_search = 0x00000001; + enc->enc_pic.me.motion_est_half_pixel = 0x00000001; + enc->enc_pic.me.enc_search_range_x = 0x00000010; + enc->enc_pic.me.enc_search_range_y = 0x00000010; + enc->enc_pic.me.enc_search1_range_x = 0x00000010; + enc->enc_pic.me.enc_search1_range_y = 0x00000010; } static void get_pic_control_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - unsigned encNumMBsPerSlice; - encNumMBsPerSlice = align(enc->base.width, 16) / 16; - encNumMBsPerSlice *= align(enc->base.height, 16) / 16; - if (pic->pic_ctrl.enc_frame_cropping_flag) { - enc->enc_pic.pc.enc_crop_left_offset = pic->pic_ctrl.enc_frame_crop_left_offset; - enc->enc_pic.pc.enc_crop_right_offset = pic->pic_ctrl.enc_frame_crop_right_offset; - enc->enc_pic.pc.enc_crop_top_offset = pic->pic_ctrl.enc_frame_crop_top_offset; - enc->enc_pic.pc.enc_crop_bottom_offset = pic->pic_ctrl.enc_frame_crop_bottom_offset; - } else { - enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - enc->base.width) >> 1; - enc->enc_pic.pc.enc_crop_bottom_offset = (align(enc->base.height, 16) - enc->base.height) >> 1; - } - enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice; - enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 1; - enc->enc_pic.pc.enc_number_of_reference_frames = MIN2(enc->base.max_references, 2); - enc->enc_pic.pc.enc_max_num_ref_frames = enc->base.max_references + 1; - enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; - enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; - enc->enc_pic.pc.enc_cabac_enable = pic->pic_ctrl.enc_cabac_enable; - enc->enc_pic.pc.enc_constraint_set_flags = pic->pic_ctrl.enc_constraint_set_flags; - enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; - enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; + unsigned encNumMBsPerSlice; + encNumMBsPerSlice = align(enc->base.width, 16) / 16; + encNumMBsPerSlice *= align(enc->base.height, 16) / 16; + if (pic->pic_ctrl.enc_frame_cropping_flag) { + enc->enc_pic.pc.enc_crop_left_offset = pic->pic_ctrl.enc_frame_crop_left_offset; + enc->enc_pic.pc.enc_crop_right_offset = pic->pic_ctrl.enc_frame_crop_right_offset; + enc->enc_pic.pc.enc_crop_top_offset = pic->pic_ctrl.enc_frame_crop_top_offset; + enc->enc_pic.pc.enc_crop_bottom_offset = pic->pic_ctrl.enc_frame_crop_bottom_offset; + } else { + enc->enc_pic.pc.enc_crop_right_offset = (align(enc->base.width, 16) - enc->base.width) >> 1; + enc->enc_pic.pc.enc_crop_bottom_offset = + (align(enc->base.height, 16) - enc->base.height) >> 1; + } + enc->enc_pic.pc.enc_num_mbs_per_slice = encNumMBsPerSlice; + enc->enc_pic.pc.enc_b_pic_pattern = MAX2(enc->base.max_references, 1) - 1; + enc->enc_pic.pc.enc_number_of_reference_frames = MIN2(enc->base.max_references, 2); + enc->enc_pic.pc.enc_max_num_ref_frames = enc->base.max_references + 1; + enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; + enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; + enc->enc_pic.pc.enc_cabac_enable = pic->pic_ctrl.enc_cabac_enable; + enc->enc_pic.pc.enc_constraint_set_flags = pic->pic_ctrl.enc_constraint_set_flags; + enc->enc_pic.pc.enc_num_default_active_ref_l0 = 0x00000001; + enc->enc_pic.pc.enc_num_default_active_ref_l1 = 0x00000001; } static void get_task_info_param(struct rvce_encoder *enc) { - enc->enc_pic.ti.offset_of_next_task_info = 0xffffffff; + enc->enc_pic.ti.offset_of_next_task_info = 0xffffffff; } static void get_feedback_buffer_param(struct rvce_encoder *enc) { - enc->enc_pic.fb.feedback_ring_size = 0x00000001; + enc->enc_pic.fb.feedback_ring_size = 0x00000001; } static void get_config_ext_param(struct rvce_encoder *enc) { - enc->enc_pic.ce.enc_enable_perf_logging = 0x00000003; + enc->enc_pic.ce.enc_enable_perf_logging = 0x00000003; } static void get_vui_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - enc->enc_pic.enable_vui = pic->enable_vui; - enc->enc_pic.vui.video_format = 0x00000005; - enc->enc_pic.vui.color_prim = 0x00000002; - enc->enc_pic.vui.transfer_char = 0x00000002; - enc->enc_pic.vui.matrix_coef = 0x00000002; - enc->enc_pic.vui.timing_info_present_flag = 0x00000001; - enc->enc_pic.vui.num_units_in_tick = pic->rate_ctrl.frame_rate_den; - enc->enc_pic.vui.time_scale = pic->rate_ctrl.frame_rate_num * 2; - enc->enc_pic.vui.fixed_frame_rate_flag = 0x00000001; - enc->enc_pic.vui.bit_rate_scale = 0x00000004; - enc->enc_pic.vui.cpb_size_scale = 0x00000006; - enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.cpb_removal_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.dpb_output_delay_length_minus1 = 0x00000017; - enc->enc_pic.vui.time_offset_length = 0x00000018; - enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag = 0x00000001; - enc->enc_pic.vui.max_bytes_per_pic_denom = 0x00000002; - enc->enc_pic.vui.max_bits_per_mb_denom = 0x00000001; - enc->enc_pic.vui.log2_max_mv_length_hori = 0x00000010; - enc->enc_pic.vui.log2_max_mv_length_vert = 0x00000010; - enc->enc_pic.vui.num_reorder_frames = 0x00000003; - enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003; + enc->enc_pic.enable_vui = pic->enable_vui; + enc->enc_pic.vui.video_format = 0x00000005; + enc->enc_pic.vui.color_prim = 0x00000002; + enc->enc_pic.vui.transfer_char = 0x00000002; + enc->enc_pic.vui.matrix_coef = 0x00000002; + enc->enc_pic.vui.timing_info_present_flag = 0x00000001; + enc->enc_pic.vui.num_units_in_tick = pic->rate_ctrl.frame_rate_den; + enc->enc_pic.vui.time_scale = pic->rate_ctrl.frame_rate_num * 2; + enc->enc_pic.vui.fixed_frame_rate_flag = 0x00000001; + enc->enc_pic.vui.bit_rate_scale = 0x00000004; + enc->enc_pic.vui.cpb_size_scale = 0x00000006; + enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.cpb_removal_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.dpb_output_delay_length_minus1 = 0x00000017; + enc->enc_pic.vui.time_offset_length = 0x00000018; + enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag = 0x00000001; + enc->enc_pic.vui.max_bytes_per_pic_denom = 0x00000002; + enc->enc_pic.vui.max_bits_per_mb_denom = 0x00000001; + enc->enc_pic.vui.log2_max_mv_length_hori = 0x00000010; + enc->enc_pic.vui.log2_max_mv_length_vert = 0x00000010; + enc->enc_pic.vui.num_reorder_frames = 0x00000003; + enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003; } void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - get_rate_control_param(enc, pic); - get_motion_estimation_param(enc, pic); - get_pic_control_param(enc, pic); - get_task_info_param(enc); - get_feedback_buffer_param(enc); - get_vui_param(enc, pic); - get_config_ext_param(enc); - - enc->enc_pic.picture_type = pic->picture_type; - enc->enc_pic.frame_num = pic->frame_num; - enc->enc_pic.frame_num_cnt = pic->frame_num_cnt; - enc->enc_pic.p_remain = pic->p_remain; - enc->enc_pic.i_remain = pic->i_remain; - enc->enc_pic.gop_cnt = pic->gop_cnt; - enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; - enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; - enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; - enc->enc_pic.not_referenced = pic->not_referenced; - if (enc->dual_inst) - enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201; - else - enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201; - enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); + get_rate_control_param(enc, pic); + get_motion_estimation_param(enc, pic); + get_pic_control_param(enc, pic); + get_task_info_param(enc); + get_feedback_buffer_param(enc); + get_vui_param(enc, pic); + get_config_ext_param(enc); + + enc->enc_pic.picture_type = pic->picture_type; + enc->enc_pic.frame_num = pic->frame_num; + enc->enc_pic.frame_num_cnt = pic->frame_num_cnt; + enc->enc_pic.p_remain = pic->p_remain; + enc->enc_pic.i_remain = pic->i_remain; + enc->enc_pic.gop_cnt = pic->gop_cnt; + enc->enc_pic.pic_order_cnt = pic->pic_order_cnt; + enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0; + enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1; + enc->enc_pic.not_referenced = pic->not_referenced; + if (enc->dual_inst) + enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x00000201; + else + enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants = 0x01000201; + enc->enc_pic.is_idr = (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); } static void create(struct rvce_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - enc->task_info(enc, 0x00000000, 0, 0, 0); - - RVCE_BEGIN(0x01000001); // create cmd - RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer); - RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile - RVCE_CS(enc->base.level); // encLevel - RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction); - RVCE_CS(enc->base.width); // encImageWidth - RVCE_CS(enc->base.height); // encImageHeight - - if (sscreen->info.chip_class < GFX9) { - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw - } else { - RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encRefPicLumaPitch - RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encRefPicChromaPitch - RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // encRefYHeightInQw - } - - RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants); - - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_luma_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_chroma_buffer_offset); - RVCE_CS(enc->enc_pic.ec.enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity); - RVCE_END(); + struct si_screen *sscreen = (struct si_screen *)enc->screen; + enc->task_info(enc, 0x00000000, 0, 0, 0); + + RVCE_BEGIN(0x01000001); // create cmd + RVCE_CS(enc->enc_pic.ec.enc_use_circular_buffer); + RVCE_CS(u_get_h264_profile_idc(enc->base.profile)); // encProfile + RVCE_CS(enc->base.level); // encLevel + RVCE_CS(enc->enc_pic.ec.enc_pic_struct_restriction); + RVCE_CS(enc->base.width); // encImageWidth + RVCE_CS(enc->base.height); // encImageHeight + + if (sscreen->info.chip_class < GFX9) { + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw + } else { + RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encRefPicLumaPitch + RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encRefPicChromaPitch + RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16) / 8); // encRefYHeightInQw + } + + RVCE_CS(enc->enc_pic.addrmode_arraymode_disrdo_distwoinstants); + + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_context_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_luma_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_input_chroma_buffer_offset); + RVCE_CS(enc->enc_pic.ec.enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity); + RVCE_END(); } static void encode(struct rvce_encoder *enc) { - struct si_screen *sscreen = (struct si_screen *)enc->screen; - signed luma_offset, chroma_offset, bs_offset; - unsigned dep, bs_idx = enc->bs_idx++; - int i; - - if (enc->dual_inst) { - if (bs_idx == 0) - dep = 1; - else if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) - dep = 0; - else - dep = 2; - } else - dep = 0; - - enc->task_info(enc, 0x00000003, dep, 0, bs_idx); - - RVCE_BEGIN(0x05000001); // context buffer - RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo - RVCE_END(); - - bs_offset = -(signed)(bs_idx * enc->bs_size); - - RVCE_BEGIN(0x05000004); // video bitstream buffer - RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo - RVCE_CS(enc->bs_size); // videoBitstreamRingSize - RVCE_END(); - - if (enc->dual_pipe) { - unsigned aux_offset = enc->cpb.res->buf->size - - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; - RVCE_BEGIN(0x05000002); // auxiliary buffer - for (i = 0; i < 8; ++i) { - RVCE_CS(aux_offset); - aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; - } - for (i = 0; i < 8; ++i) - RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); - RVCE_END(); - } - - RVCE_BEGIN(0x03000001); // encode - RVCE_CS(enc->enc_pic.frame_num ? 0x0 : 0x11); // insertHeaders - RVCE_CS(enc->enc_pic.eo.picture_structure); - RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize - RVCE_CS(enc->enc_pic.eo.force_refresh_map); - RVCE_CS(enc->enc_pic.eo.insert_aud); - RVCE_CS(enc->enc_pic.eo.end_of_sequence); - RVCE_CS(enc->enc_pic.eo.end_of_stream); - - if (sscreen->info.chip_class < GFX9) { - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch - } else { - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->luma->u.gfx9.surf_offset); // inputPictureLumaAddressHi/Lo - RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, - enc->chroma->u.gfx9.surf_offset); // inputPictureChromaAddressHi/Lo - RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16)); // encInputFrameYPitch - RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encInputPicLumaPitch - RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encInputPicChromaPitch - } - - if (enc->dual_pipe) - enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00000000; - else - enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00010000; - RVCE_CS(enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload); - RVCE_CS(enc->enc_pic.eo.enc_input_pic_tile_config); - RVCE_CS(enc->enc_pic.picture_type); // encPicType - RVCE_CS(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag - if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.eo.enc_idr_pic_id !=0)) - enc->enc_pic.eo.enc_idr_pic_id = enc->enc_pic.idr_pic_id - 1; - else - enc->enc_pic.eo.enc_idr_pic_id = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_idr_pic_id); - RVCE_CS(enc->enc_pic.eo.enc_mgs_key_pic); - RVCE_CS(!enc->enc_pic.not_referenced); - RVCE_CS(enc->enc_pic.eo.enc_temporal_layer_index); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_active_override_flag); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_l0_active_minus1); - RVCE_CS(enc->enc_pic.eo.num_ref_idx_l1_active_minus1); - - i = enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0; - if (i > 1 && enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000001; - enc->enc_pic.eo.enc_ref_list_modification_num = i - 1; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } else { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; - enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } - - for (i = 0; i < 3; ++i) { - enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; - enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); - RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); - } - for (i = 0; i < 4; ++i) { - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_op); - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_num); - RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_idx); - RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_op); - RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_num); - } - - // encReferencePictureL0[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || - enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l0 = si_l0_slot(enc); - si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); - RVCE_CS(l0->picture_type); - RVCE_CS(l0->frame_num); - RVCE_CS(l0->pic_order_cnt); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - } else { - enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l0_frame_number = 0x00000000; - enc->enc_pic.eo.l0_picture_order_count = 0x00000000; - enc->enc_pic.eo.l0_luma_offset = 0xffffffff; - enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l0_frame_number); - RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l0_luma_offset); - RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); - } - - // encReferencePictureL0[1] - enc->enc_pic.eo.l0_picture_structure = 0x00000000; - enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l0_frame_number = 0x00000000; - enc->enc_pic.eo.l0_picture_order_count = 0x00000000; - enc->enc_pic.eo.l0_luma_offset = 0xffffffff; - enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l0_picture_structure); - RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l0_frame_number); - RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l0_luma_offset); - RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); - - // encReferencePictureL1[0] - RVCE_CS(0x00000000); // pictureStructure - if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { - struct rvce_cpb_slot *l1 = si_l1_slot(enc); - si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); - RVCE_CS(l1->picture_type); - RVCE_CS(l1->frame_num); - RVCE_CS(l1->pic_order_cnt); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - } else { - enc->enc_pic.eo.l1_enc_pic_type = 0x00000000; - enc->enc_pic.eo.l1_frame_number = 0x00000000; - enc->enc_pic.eo.l1_picture_order_count = 0x00000000; - enc->enc_pic.eo.l1_luma_offset = 0xffffffff; - enc->enc_pic.eo.l1_chroma_offset = 0xffffffff; - RVCE_CS(enc->enc_pic.eo.l1_enc_pic_type); - RVCE_CS(enc->enc_pic.eo.l1_frame_number); - RVCE_CS(enc->enc_pic.eo.l1_picture_order_count); - RVCE_CS(enc->enc_pic.eo.l1_luma_offset); - RVCE_CS(enc->enc_pic.eo.l1_chroma_offset); - } - - si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); - RVCE_CS(luma_offset); - RVCE_CS(chroma_offset); - RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset); - RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_luma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_chroma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_luma_offset); - RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_chroma_offset); - RVCE_CS(enc->enc_pic.frame_num_cnt-1); - RVCE_CS(enc->enc_pic.frame_num); - RVCE_CS(enc->enc_pic.pic_order_cnt); - RVCE_CS(enc->enc_pic.i_remain); - RVCE_CS(enc->enc_pic.p_remain); - RVCE_CS(enc->enc_pic.eo.num_b_pic_remain_in_rcgop); - RVCE_CS(enc->enc_pic.eo.num_ir_pic_remain_in_rcgop); - RVCE_CS(enc->enc_pic.eo.enable_intra_refresh); - - RVCE_CS(enc->enc_pic.eo.aq_variance_en); - RVCE_CS(enc->enc_pic.eo.aq_block_size); - RVCE_CS(enc->enc_pic.eo.aq_mb_variance_sel); - RVCE_CS(enc->enc_pic.eo.aq_frame_variance_sel); - RVCE_CS(enc->enc_pic.eo.aq_param_a); - RVCE_CS(enc->enc_pic.eo.aq_param_b); - RVCE_CS(enc->enc_pic.eo.aq_param_c); - RVCE_CS(enc->enc_pic.eo.aq_param_d); - RVCE_CS(enc->enc_pic.eo.aq_param_e); - - RVCE_CS(enc->enc_pic.eo.context_in_sfb); - RVCE_END(); + struct si_screen *sscreen = (struct si_screen *)enc->screen; + signed luma_offset, chroma_offset, bs_offset; + unsigned dep, bs_idx = enc->bs_idx++; + int i; + + if (enc->dual_inst) { + if (bs_idx == 0) + dep = 1; + else if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + dep = 0; + else + dep = 2; + } else + dep = 0; + + enc->task_info(enc, 0x00000003, dep, 0, bs_idx); + + RVCE_BEGIN(0x05000001); // context buffer + RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo + RVCE_END(); + + bs_offset = -(signed)(bs_idx * enc->bs_size); + + RVCE_BEGIN(0x05000004); // video bitstream buffer + RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo + RVCE_CS(enc->bs_size); // videoBitstreamRingSize + RVCE_END(); + + if (enc->dual_pipe) { + unsigned aux_offset = + enc->cpb.res->buf->size - RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + RVCE_BEGIN(0x05000002); // auxiliary buffer + for (i = 0; i < 8; ++i) { + RVCE_CS(aux_offset); + aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE; + } + for (i = 0; i < 8; ++i) + RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE); + RVCE_END(); + } + + RVCE_BEGIN(0x03000001); // encode + RVCE_CS(enc->enc_pic.frame_num ? 0x0 : 0x11); // insertHeaders + RVCE_CS(enc->enc_pic.eo.picture_structure); + RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize + RVCE_CS(enc->enc_pic.eo.force_refresh_map); + RVCE_CS(enc->enc_pic.eo.insert_aud); + RVCE_CS(enc->enc_pic.eo.end_of_sequence); + RVCE_CS(enc->enc_pic.eo.end_of_stream); + + if (sscreen->info.chip_class < GFX9) { + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.legacy.level[0].offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encInputPicChromaPitch + } else { + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->luma->u.gfx9.surf_offset); // inputPictureLumaAddressHi/Lo + RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, + enc->chroma->u.gfx9.surf_offset); // inputPictureChromaAddressHi/Lo + RVCE_CS(align(enc->luma->u.gfx9.surf_height, 16)); // encInputFrameYPitch + RVCE_CS(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe); // encInputPicLumaPitch + RVCE_CS(enc->chroma->u.gfx9.surf_pitch * enc->chroma->bpe); // encInputPicChromaPitch + } + + if (enc->dual_pipe) + enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00000000; + else + enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload = 0x00010000; + RVCE_CS(enc->enc_pic.eo.enc_input_pic_addr_array_disable2pipe_disablemboffload); + RVCE_CS(enc->enc_pic.eo.enc_input_pic_tile_config); + RVCE_CS(enc->enc_pic.picture_type); // encPicType + RVCE_CS(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag + if ((enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) && + (enc->enc_pic.eo.enc_idr_pic_id != 0)) + enc->enc_pic.eo.enc_idr_pic_id = enc->enc_pic.idr_pic_id - 1; + else + enc->enc_pic.eo.enc_idr_pic_id = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_idr_pic_id); + RVCE_CS(enc->enc_pic.eo.enc_mgs_key_pic); + RVCE_CS(!enc->enc_pic.not_referenced); + RVCE_CS(enc->enc_pic.eo.enc_temporal_layer_index); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_active_override_flag); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_l0_active_minus1); + RVCE_CS(enc->enc_pic.eo.num_ref_idx_l1_active_minus1); + + i = enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0; + if (i > 1 && enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000001; + enc->enc_pic.eo.enc_ref_list_modification_num = i - 1; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } else { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; + enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } + + for (i = 0; i < 3; ++i) { + enc->enc_pic.eo.enc_ref_list_modification_op = 0x00000000; + enc->enc_pic.eo.enc_ref_list_modification_num = 0x00000000; + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_op); + RVCE_CS(enc->enc_pic.eo.enc_ref_list_modification_num); + } + for (i = 0; i < 4; ++i) { + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_op); + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_num); + RVCE_CS(enc->enc_pic.eo.enc_decoded_picture_marking_idx); + RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_op); + RVCE_CS(enc->enc_pic.eo.enc_decoded_ref_base_picture_marking_num); + } + + // encReferencePictureL0[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l0 = si_l0_slot(enc); + si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset); + RVCE_CS(l0->picture_type); + RVCE_CS(l0->frame_num); + RVCE_CS(l0->pic_order_cnt); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + } else { + enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l0_frame_number = 0x00000000; + enc->enc_pic.eo.l0_picture_order_count = 0x00000000; + enc->enc_pic.eo.l0_luma_offset = 0xffffffff; + enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l0_frame_number); + RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l0_luma_offset); + RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); + } + + // encReferencePictureL0[1] + enc->enc_pic.eo.l0_picture_structure = 0x00000000; + enc->enc_pic.eo.l0_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l0_frame_number = 0x00000000; + enc->enc_pic.eo.l0_picture_order_count = 0x00000000; + enc->enc_pic.eo.l0_luma_offset = 0xffffffff; + enc->enc_pic.eo.l0_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l0_picture_structure); + RVCE_CS(enc->enc_pic.eo.l0_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l0_frame_number); + RVCE_CS(enc->enc_pic.eo.l0_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l0_luma_offset); + RVCE_CS(enc->enc_pic.eo.l0_chroma_offset); + + // encReferencePictureL1[0] + RVCE_CS(0x00000000); // pictureStructure + if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + struct rvce_cpb_slot *l1 = si_l1_slot(enc); + si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset); + RVCE_CS(l1->picture_type); + RVCE_CS(l1->frame_num); + RVCE_CS(l1->pic_order_cnt); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + } else { + enc->enc_pic.eo.l1_enc_pic_type = 0x00000000; + enc->enc_pic.eo.l1_frame_number = 0x00000000; + enc->enc_pic.eo.l1_picture_order_count = 0x00000000; + enc->enc_pic.eo.l1_luma_offset = 0xffffffff; + enc->enc_pic.eo.l1_chroma_offset = 0xffffffff; + RVCE_CS(enc->enc_pic.eo.l1_enc_pic_type); + RVCE_CS(enc->enc_pic.eo.l1_frame_number); + RVCE_CS(enc->enc_pic.eo.l1_picture_order_count); + RVCE_CS(enc->enc_pic.eo.l1_luma_offset); + RVCE_CS(enc->enc_pic.eo.l1_chroma_offset); + } + + si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset); + RVCE_CS(luma_offset); + RVCE_CS(chroma_offset); + RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset); + RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_luma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reconstructed_ref_base_picture_chroma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_luma_offset); + RVCE_CS(enc->enc_pic.eo.enc_reference_ref_base_picture_chroma_offset); + RVCE_CS(enc->enc_pic.frame_num_cnt - 1); + RVCE_CS(enc->enc_pic.frame_num); + RVCE_CS(enc->enc_pic.pic_order_cnt); + RVCE_CS(enc->enc_pic.i_remain); + RVCE_CS(enc->enc_pic.p_remain); + RVCE_CS(enc->enc_pic.eo.num_b_pic_remain_in_rcgop); + RVCE_CS(enc->enc_pic.eo.num_ir_pic_remain_in_rcgop); + RVCE_CS(enc->enc_pic.eo.enable_intra_refresh); + + RVCE_CS(enc->enc_pic.eo.aq_variance_en); + RVCE_CS(enc->enc_pic.eo.aq_block_size); + RVCE_CS(enc->enc_pic.eo.aq_mb_variance_sel); + RVCE_CS(enc->enc_pic.eo.aq_frame_variance_sel); + RVCE_CS(enc->enc_pic.eo.aq_param_a); + RVCE_CS(enc->enc_pic.eo.aq_param_b); + RVCE_CS(enc->enc_pic.eo.aq_param_c); + RVCE_CS(enc->enc_pic.eo.aq_param_d); + RVCE_CS(enc->enc_pic.eo.aq_param_e); + + RVCE_CS(enc->enc_pic.eo.context_in_sfb); + RVCE_END(); } static void rate_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000005); // rate control - RVCE_CS(enc->enc_pic.rc.rc_method); - RVCE_CS(enc->enc_pic.rc.target_bitrate); - RVCE_CS(enc->enc_pic.rc.peak_bitrate); - RVCE_CS(enc->enc_pic.rc.frame_rate_num); - RVCE_CS(enc->enc_pic.rc.gop_size); - RVCE_CS(enc->enc_pic.rc.quant_i_frames); - RVCE_CS(enc->enc_pic.rc.quant_p_frames); - RVCE_CS(enc->enc_pic.rc.quant_b_frames); - RVCE_CS(enc->enc_pic.rc.vbv_buffer_size); - RVCE_CS(enc->enc_pic.rc.frame_rate_den); - RVCE_CS(enc->enc_pic.rc.vbv_buf_lv); - RVCE_CS(enc->enc_pic.rc.max_au_size); - RVCE_CS(enc->enc_pic.rc.qp_initial_mode); - RVCE_CS(enc->enc_pic.rc.target_bits_picture); - RVCE_CS(enc->enc_pic.rc.peak_bits_picture_integer); - RVCE_CS(enc->enc_pic.rc.peak_bits_picture_fraction); - RVCE_CS(enc->enc_pic.rc.min_qp); - RVCE_CS(enc->enc_pic.rc.max_qp); - RVCE_CS(enc->enc_pic.rc.skip_frame_enable); - RVCE_CS(enc->enc_pic.rc.fill_data_enable); - RVCE_CS(enc->enc_pic.rc.enforce_hrd); - RVCE_CS(enc->enc_pic.rc.b_pics_delta_qp); - RVCE_CS(enc->enc_pic.rc.ref_b_pics_delta_qp); - RVCE_CS(enc->enc_pic.rc.rc_reinit_disable); - RVCE_CS(enc->enc_pic.rc.enc_lcvbr_init_qp_flag); - RVCE_CS(enc->enc_pic.rc.lcvbrsatd_based_nonlinear_bit_budget_flag); - RVCE_END(); + RVCE_BEGIN(0x04000005); // rate control + RVCE_CS(enc->enc_pic.rc.rc_method); + RVCE_CS(enc->enc_pic.rc.target_bitrate); + RVCE_CS(enc->enc_pic.rc.peak_bitrate); + RVCE_CS(enc->enc_pic.rc.frame_rate_num); + RVCE_CS(enc->enc_pic.rc.gop_size); + RVCE_CS(enc->enc_pic.rc.quant_i_frames); + RVCE_CS(enc->enc_pic.rc.quant_p_frames); + RVCE_CS(enc->enc_pic.rc.quant_b_frames); + RVCE_CS(enc->enc_pic.rc.vbv_buffer_size); + RVCE_CS(enc->enc_pic.rc.frame_rate_den); + RVCE_CS(enc->enc_pic.rc.vbv_buf_lv); + RVCE_CS(enc->enc_pic.rc.max_au_size); + RVCE_CS(enc->enc_pic.rc.qp_initial_mode); + RVCE_CS(enc->enc_pic.rc.target_bits_picture); + RVCE_CS(enc->enc_pic.rc.peak_bits_picture_integer); + RVCE_CS(enc->enc_pic.rc.peak_bits_picture_fraction); + RVCE_CS(enc->enc_pic.rc.min_qp); + RVCE_CS(enc->enc_pic.rc.max_qp); + RVCE_CS(enc->enc_pic.rc.skip_frame_enable); + RVCE_CS(enc->enc_pic.rc.fill_data_enable); + RVCE_CS(enc->enc_pic.rc.enforce_hrd); + RVCE_CS(enc->enc_pic.rc.b_pics_delta_qp); + RVCE_CS(enc->enc_pic.rc.ref_b_pics_delta_qp); + RVCE_CS(enc->enc_pic.rc.rc_reinit_disable); + RVCE_CS(enc->enc_pic.rc.enc_lcvbr_init_qp_flag); + RVCE_CS(enc->enc_pic.rc.lcvbrsatd_based_nonlinear_bit_budget_flag); + RVCE_END(); } static void config(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); - enc->rate_control(enc); - enc->config_extension(enc); - enc->motion_estimation(enc); - enc->rdo(enc); - if (enc->use_vui) - enc->vui(enc); - enc->pic_control(enc); + enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0); + enc->rate_control(enc); + enc->config_extension(enc); + enc->motion_estimation(enc); + enc->rdo(enc); + if (enc->use_vui) + enc->vui(enc); + enc->pic_control(enc); } static void config_extension(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000001); // config extension - RVCE_CS(enc->enc_pic.ce.enc_enable_perf_logging); - RVCE_END(); + RVCE_BEGIN(0x04000001); // config extension + RVCE_CS(enc->enc_pic.ce.enc_enable_perf_logging); + RVCE_END(); } static void feedback(struct rvce_encoder *enc) { - RVCE_BEGIN(0x05000005); // feedback buffer - RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo - RVCE_CS(enc->enc_pic.fb.feedback_ring_size); - RVCE_END(); + RVCE_BEGIN(0x05000005); // feedback buffer + RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo + RVCE_CS(enc->enc_pic.fb.feedback_ring_size); + RVCE_END(); } static void destroy(struct rvce_encoder *enc) { - enc->task_info(enc, 0x00000001, 0, 0, 0); + enc->task_info(enc, 0x00000001, 0, 0, 0); - feedback(enc); + feedback(enc); - RVCE_BEGIN(0x02000001); // destroy - RVCE_END(); + RVCE_BEGIN(0x02000001); // destroy + RVCE_END(); } static void motion_estimation(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000007); // motion estimation - RVCE_CS(enc->enc_pic.me.enc_ime_decimation_search); - RVCE_CS(enc->enc_pic.me.motion_est_half_pixel); - RVCE_CS(enc->enc_pic.me.motion_est_quarter_pixel); - RVCE_CS(enc->enc_pic.me.disable_favor_pmv_point); - RVCE_CS(enc->enc_pic.me.force_zero_point_center); - RVCE_CS(enc->enc_pic.me.lsmvert); - RVCE_CS(enc->enc_pic.me.enc_search_range_x); - RVCE_CS(enc->enc_pic.me.enc_search_range_y); - RVCE_CS(enc->enc_pic.me.enc_search1_range_x); - RVCE_CS(enc->enc_pic.me.enc_search1_range_y); - RVCE_CS(enc->enc_pic.me.disable_16x16_frame1); - RVCE_CS(enc->enc_pic.me.disable_satd); - RVCE_CS(enc->enc_pic.me.enable_amd); - RVCE_CS(enc->enc_pic.me.enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.enc_ime_skip_x); - RVCE_CS(enc->enc_pic.me.enc_ime_skip_y); - RVCE_CS(enc->enc_pic.me.enc_en_ime_overw_dis_subm); - RVCE_CS(enc->enc_pic.me.enc_ime_overw_dis_subm_no); - RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_x); - RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_y); - RVCE_CS(enc->enc_pic.me.parallel_mode_speedup_enable); - RVCE_CS(enc->enc_pic.me.fme0_enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.fme1_enc_disable_sub_mode); - RVCE_CS(enc->enc_pic.me.ime_sw_speedup_enable); - RVCE_END(); + RVCE_BEGIN(0x04000007); // motion estimation + RVCE_CS(enc->enc_pic.me.enc_ime_decimation_search); + RVCE_CS(enc->enc_pic.me.motion_est_half_pixel); + RVCE_CS(enc->enc_pic.me.motion_est_quarter_pixel); + RVCE_CS(enc->enc_pic.me.disable_favor_pmv_point); + RVCE_CS(enc->enc_pic.me.force_zero_point_center); + RVCE_CS(enc->enc_pic.me.lsmvert); + RVCE_CS(enc->enc_pic.me.enc_search_range_x); + RVCE_CS(enc->enc_pic.me.enc_search_range_y); + RVCE_CS(enc->enc_pic.me.enc_search1_range_x); + RVCE_CS(enc->enc_pic.me.enc_search1_range_y); + RVCE_CS(enc->enc_pic.me.disable_16x16_frame1); + RVCE_CS(enc->enc_pic.me.disable_satd); + RVCE_CS(enc->enc_pic.me.enable_amd); + RVCE_CS(enc->enc_pic.me.enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.enc_ime_skip_x); + RVCE_CS(enc->enc_pic.me.enc_ime_skip_y); + RVCE_CS(enc->enc_pic.me.enc_en_ime_overw_dis_subm); + RVCE_CS(enc->enc_pic.me.enc_ime_overw_dis_subm_no); + RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_x); + RVCE_CS(enc->enc_pic.me.enc_ime2_search_range_y); + RVCE_CS(enc->enc_pic.me.parallel_mode_speedup_enable); + RVCE_CS(enc->enc_pic.me.fme0_enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.fme1_enc_disable_sub_mode); + RVCE_CS(enc->enc_pic.me.ime_sw_speedup_enable); + RVCE_END(); } static void pic_control(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000002); // pic control - RVCE_CS(enc->enc_pic.pc.enc_use_constrained_intra_pred); - RVCE_CS(enc->enc_pic.pc.enc_cabac_enable); - RVCE_CS(enc->enc_pic.pc.enc_cabac_idc); - RVCE_CS(enc->enc_pic.pc.enc_loop_filter_disable); - RVCE_CS(enc->enc_pic.pc.enc_lf_beta_offset); - RVCE_CS(enc->enc_pic.pc.enc_lf_alpha_c0_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_left_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_right_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_top_offset); - RVCE_CS(enc->enc_pic.pc.enc_crop_bottom_offset); - RVCE_CS(enc->enc_pic.pc.enc_num_mbs_per_slice); - RVCE_CS(enc->enc_pic.pc.enc_intra_refresh_num_mbs_per_slot); - RVCE_CS(enc->enc_pic.pc.enc_force_intra_refresh); - RVCE_CS(enc->enc_pic.pc.enc_force_imb_period); - RVCE_CS(enc->enc_pic.pc.enc_pic_order_cnt_type); - RVCE_CS(enc->enc_pic.pc.log2_max_pic_order_cnt_lsb_minus4); - RVCE_CS(enc->enc_pic.pc.enc_sps_id); - RVCE_CS(enc->enc_pic.pc.enc_pps_id); - RVCE_CS(enc->enc_pic.pc.enc_constraint_set_flags); - RVCE_CS(enc->enc_pic.pc.enc_b_pic_pattern); - RVCE_CS(enc->enc_pic.pc.weight_pred_mode_b_picture); - RVCE_CS(enc->enc_pic.pc.enc_number_of_reference_frames); - RVCE_CS(enc->enc_pic.pc.enc_max_num_ref_frames); - RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l0); - RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l1); - RVCE_CS(enc->enc_pic.pc.enc_slice_mode); - RVCE_CS(enc->enc_pic.pc.enc_max_slice_size); - RVCE_END(); + RVCE_BEGIN(0x04000002); // pic control + RVCE_CS(enc->enc_pic.pc.enc_use_constrained_intra_pred); + RVCE_CS(enc->enc_pic.pc.enc_cabac_enable); + RVCE_CS(enc->enc_pic.pc.enc_cabac_idc); + RVCE_CS(enc->enc_pic.pc.enc_loop_filter_disable); + RVCE_CS(enc->enc_pic.pc.enc_lf_beta_offset); + RVCE_CS(enc->enc_pic.pc.enc_lf_alpha_c0_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_left_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_right_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_top_offset); + RVCE_CS(enc->enc_pic.pc.enc_crop_bottom_offset); + RVCE_CS(enc->enc_pic.pc.enc_num_mbs_per_slice); + RVCE_CS(enc->enc_pic.pc.enc_intra_refresh_num_mbs_per_slot); + RVCE_CS(enc->enc_pic.pc.enc_force_intra_refresh); + RVCE_CS(enc->enc_pic.pc.enc_force_imb_period); + RVCE_CS(enc->enc_pic.pc.enc_pic_order_cnt_type); + RVCE_CS(enc->enc_pic.pc.log2_max_pic_order_cnt_lsb_minus4); + RVCE_CS(enc->enc_pic.pc.enc_sps_id); + RVCE_CS(enc->enc_pic.pc.enc_pps_id); + RVCE_CS(enc->enc_pic.pc.enc_constraint_set_flags); + RVCE_CS(enc->enc_pic.pc.enc_b_pic_pattern); + RVCE_CS(enc->enc_pic.pc.weight_pred_mode_b_picture); + RVCE_CS(enc->enc_pic.pc.enc_number_of_reference_frames); + RVCE_CS(enc->enc_pic.pc.enc_max_num_ref_frames); + RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l0); + RVCE_CS(enc->enc_pic.pc.enc_num_default_active_ref_l1); + RVCE_CS(enc->enc_pic.pc.enc_slice_mode); + RVCE_CS(enc->enc_pic.pc.enc_max_slice_size); + RVCE_END(); } static void rdo(struct rvce_encoder *enc) { - RVCE_BEGIN(0x04000008); // rdo - RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_i_frame); - RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_p_frame); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y_1); - RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv_1); - RVCE_CS(enc->enc_pic.rdo.enc_16x16_cost_adj); - RVCE_CS(enc->enc_pic.rdo.enc_skip_cost_adj); - RVCE_CS(enc->enc_pic.rdo.enc_force_16x16_skip); - RVCE_CS(enc->enc_pic.rdo.enc_disable_threshold_calc_a); - RVCE_CS(enc->enc_pic.rdo.enc_luma_coeff_cost); - RVCE_CS(enc->enc_pic.rdo.enc_luma_mb_coeff_cost); - RVCE_CS(enc->enc_pic.rdo.enc_chroma_coeff_cost); - RVCE_END(); + RVCE_BEGIN(0x04000008); // rdo + RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_i_frame); + RVCE_CS(enc->enc_pic.rdo.enc_disable_tbe_pred_p_frame); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_y_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_interpol_uv_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_y_1); + RVCE_CS(enc->enc_pic.rdo.use_fme_intrapol_uv_1); + RVCE_CS(enc->enc_pic.rdo.enc_16x16_cost_adj); + RVCE_CS(enc->enc_pic.rdo.enc_skip_cost_adj); + RVCE_CS(enc->enc_pic.rdo.enc_force_16x16_skip); + RVCE_CS(enc->enc_pic.rdo.enc_disable_threshold_calc_a); + RVCE_CS(enc->enc_pic.rdo.enc_luma_coeff_cost); + RVCE_CS(enc->enc_pic.rdo.enc_luma_mb_coeff_cost); + RVCE_CS(enc->enc_pic.rdo.enc_chroma_coeff_cost); + RVCE_END(); } static void session(struct rvce_encoder *enc) { - RVCE_BEGIN(0x00000001); // session cmd - RVCE_CS(enc->stream_handle); - RVCE_END(); + RVCE_BEGIN(0x00000001); // session cmd + RVCE_CS(enc->stream_handle); + RVCE_END(); } -static void task_info(struct rvce_encoder *enc, uint32_t op, - uint32_t dep, uint32_t fb_idx, uint32_t ring_idx) +static void task_info(struct rvce_encoder *enc, uint32_t op, uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx) { - RVCE_BEGIN(0x00000002); // task info - if (op == 0x3) { - if (enc->task_info_idx) { - uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; - // Update offsetOfNextTaskInfo - enc->cs->current.buf[enc->task_info_idx] = offs; - } - enc->task_info_idx = enc->cs->current.cdw; - } - enc->enc_pic.ti.task_operation = op; - enc->enc_pic.ti.reference_picture_dependency = dep; - enc->enc_pic.ti.feedback_index = fb_idx; - enc->enc_pic.ti.video_bitstream_ring_index = ring_idx; - RVCE_CS(enc->enc_pic.ti.offset_of_next_task_info); - RVCE_CS(enc->enc_pic.ti.task_operation); - RVCE_CS(enc->enc_pic.ti.reference_picture_dependency); - RVCE_CS(enc->enc_pic.ti.collocate_flag_dependency); - RVCE_CS(enc->enc_pic.ti.feedback_index); - RVCE_CS(enc->enc_pic.ti.video_bitstream_ring_index); - RVCE_END(); + RVCE_BEGIN(0x00000002); // task info + if (op == 0x3) { + if (enc->task_info_idx) { + uint32_t offs = enc->cs->current.cdw - enc->task_info_idx + 3; + // Update offsetOfNextTaskInfo + enc->cs->current.buf[enc->task_info_idx] = offs; + } + enc->task_info_idx = enc->cs->current.cdw; + } + enc->enc_pic.ti.task_operation = op; + enc->enc_pic.ti.reference_picture_dependency = dep; + enc->enc_pic.ti.feedback_index = fb_idx; + enc->enc_pic.ti.video_bitstream_ring_index = ring_idx; + RVCE_CS(enc->enc_pic.ti.offset_of_next_task_info); + RVCE_CS(enc->enc_pic.ti.task_operation); + RVCE_CS(enc->enc_pic.ti.reference_picture_dependency); + RVCE_CS(enc->enc_pic.ti.collocate_flag_dependency); + RVCE_CS(enc->enc_pic.ti.feedback_index); + RVCE_CS(enc->enc_pic.ti.video_bitstream_ring_index); + RVCE_END(); } static void vui(struct rvce_encoder *enc) { - int i; - - if (!enc->enc_pic.enable_vui) - return; - - RVCE_BEGIN(0x04000009); // vui - RVCE_CS(enc->enc_pic.vui.aspect_ratio_info_present_flag); - RVCE_CS(enc->enc_pic.vui.aspect_ratio_idc); - RVCE_CS(enc->enc_pic.vui.sar_width); - RVCE_CS(enc->enc_pic.vui.sar_height); - RVCE_CS(enc->enc_pic.vui.overscan_info_present_flag); - RVCE_CS(enc->enc_pic.vui.overscan_Approp_flag); - RVCE_CS(enc->enc_pic.vui.video_signal_type_present_flag); - RVCE_CS(enc->enc_pic.vui.video_format); - RVCE_CS(enc->enc_pic.vui.video_full_range_flag); - RVCE_CS(enc->enc_pic.vui.color_description_present_flag); - RVCE_CS(enc->enc_pic.vui.color_prim); - RVCE_CS(enc->enc_pic.vui.transfer_char); - RVCE_CS(enc->enc_pic.vui.matrix_coef); - RVCE_CS(enc->enc_pic.vui.chroma_loc_info_present_flag); - RVCE_CS(enc->enc_pic.vui.chroma_loc_top); - RVCE_CS(enc->enc_pic.vui.chroma_loc_bottom); - RVCE_CS(enc->enc_pic.vui.timing_info_present_flag); - RVCE_CS(enc->enc_pic.vui.num_units_in_tick); - RVCE_CS(enc->enc_pic.vui.time_scale); - RVCE_CS(enc->enc_pic.vui.fixed_frame_rate_flag); - RVCE_CS(enc->enc_pic.vui.nal_hrd_parameters_present_flag); - RVCE_CS(enc->enc_pic.vui.cpb_cnt_minus1); - RVCE_CS(enc->enc_pic.vui.bit_rate_scale); - RVCE_CS(enc->enc_pic.vui.cpb_size_scale); - for (i = 0; i < 32; i++) { - RVCE_CS(enc->enc_pic.vui.bit_rate_value_minus); - RVCE_CS(enc->enc_pic.vui.cpb_size_value_minus); - RVCE_CS(enc->enc_pic.vui.cbr_flag); - } - RVCE_CS(enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.cpb_removal_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.dpb_output_delay_length_minus1); - RVCE_CS(enc->enc_pic.vui.time_offset_length); - RVCE_CS(enc->enc_pic.vui.low_delay_hrd_flag); - RVCE_CS(enc->enc_pic.vui.pic_struct_present_flag); - RVCE_CS(enc->enc_pic.vui.bitstream_restriction_present_flag); - RVCE_CS(enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag); - RVCE_CS(enc->enc_pic.vui.max_bytes_per_pic_denom); - RVCE_CS(enc->enc_pic.vui.max_bits_per_mb_denom); - RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_hori); - RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_vert); - RVCE_CS(enc->enc_pic.vui.num_reorder_frames); - RVCE_CS(enc->enc_pic.vui.max_dec_frame_buffering); - RVCE_END(); + int i; + + if (!enc->enc_pic.enable_vui) + return; + + RVCE_BEGIN(0x04000009); // vui + RVCE_CS(enc->enc_pic.vui.aspect_ratio_info_present_flag); + RVCE_CS(enc->enc_pic.vui.aspect_ratio_idc); + RVCE_CS(enc->enc_pic.vui.sar_width); + RVCE_CS(enc->enc_pic.vui.sar_height); + RVCE_CS(enc->enc_pic.vui.overscan_info_present_flag); + RVCE_CS(enc->enc_pic.vui.overscan_Approp_flag); + RVCE_CS(enc->enc_pic.vui.video_signal_type_present_flag); + RVCE_CS(enc->enc_pic.vui.video_format); + RVCE_CS(enc->enc_pic.vui.video_full_range_flag); + RVCE_CS(enc->enc_pic.vui.color_description_present_flag); + RVCE_CS(enc->enc_pic.vui.color_prim); + RVCE_CS(enc->enc_pic.vui.transfer_char); + RVCE_CS(enc->enc_pic.vui.matrix_coef); + RVCE_CS(enc->enc_pic.vui.chroma_loc_info_present_flag); + RVCE_CS(enc->enc_pic.vui.chroma_loc_top); + RVCE_CS(enc->enc_pic.vui.chroma_loc_bottom); + RVCE_CS(enc->enc_pic.vui.timing_info_present_flag); + RVCE_CS(enc->enc_pic.vui.num_units_in_tick); + RVCE_CS(enc->enc_pic.vui.time_scale); + RVCE_CS(enc->enc_pic.vui.fixed_frame_rate_flag); + RVCE_CS(enc->enc_pic.vui.nal_hrd_parameters_present_flag); + RVCE_CS(enc->enc_pic.vui.cpb_cnt_minus1); + RVCE_CS(enc->enc_pic.vui.bit_rate_scale); + RVCE_CS(enc->enc_pic.vui.cpb_size_scale); + for (i = 0; i < 32; i++) { + RVCE_CS(enc->enc_pic.vui.bit_rate_value_minus); + RVCE_CS(enc->enc_pic.vui.cpb_size_value_minus); + RVCE_CS(enc->enc_pic.vui.cbr_flag); + } + RVCE_CS(enc->enc_pic.vui.initial_cpb_removal_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.cpb_removal_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.dpb_output_delay_length_minus1); + RVCE_CS(enc->enc_pic.vui.time_offset_length); + RVCE_CS(enc->enc_pic.vui.low_delay_hrd_flag); + RVCE_CS(enc->enc_pic.vui.pic_struct_present_flag); + RVCE_CS(enc->enc_pic.vui.bitstream_restriction_present_flag); + RVCE_CS(enc->enc_pic.vui.motion_vectors_over_pic_boundaries_flag); + RVCE_CS(enc->enc_pic.vui.max_bytes_per_pic_denom); + RVCE_CS(enc->enc_pic.vui.max_bits_per_mb_denom); + RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_hori); + RVCE_CS(enc->enc_pic.vui.log2_max_mv_length_vert); + RVCE_CS(enc->enc_pic.vui.num_reorder_frames); + RVCE_CS(enc->enc_pic.vui.max_dec_frame_buffering); + RVCE_END(); } void si_vce_52_init(struct rvce_encoder *enc) { - enc->session = session; - enc->task_info = task_info; - enc->create = create; - enc->feedback = feedback; - enc->rate_control = rate_control; - enc->config_extension = config_extension; - enc->pic_control = pic_control; - enc->motion_estimation = motion_estimation; - enc->rdo = rdo; - enc->vui = vui; - enc->config = config; - enc->encode = encode; - enc->destroy = destroy; + enc->session = session; + enc->task_info = task_info; + enc->create = create; + enc->feedback = feedback; + enc->rate_control = rate_control; + enc->config_extension = config_extension; + enc->pic_control = pic_control; + enc->motion_estimation = motion_estimation; + enc->rdo = rdo; + enc->vui = vui; + enc->config = config; + enc->encode = encode; + enc->destroy = destroy; + enc->si_get_pic_param = si_vce_52_get_param; } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c index 77bfe1876..025307121 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -25,1265 +25,1235 @@ * **************************************************************************/ -#include <assert.h> -#include <stdio.h> +#include "radeon_vcn_dec.h" #include "pipe/p_video_codec.h" - +#include "radeon_video.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_mpeg12_decoder.h" - -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vcn_dec.h" #include "vl/vl_probs_table.h" -#define FB_BUFFER_OFFSET 0x1000 -#define FB_BUFFER_SIZE 2048 -#define IT_SCALING_TABLE_SIZE 992 -#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256) -#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) +#include <assert.h> +#include <stdio.h> + +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define IT_SCALING_TABLE_SIZE 992 +#define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256) +#define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024) -#define RDECODE_VCN1_GPCOM_VCPU_CMD 0x2070c -#define RDECODE_VCN1_GPCOM_VCPU_DATA0 0x20710 -#define RDECODE_VCN1_GPCOM_VCPU_DATA1 0x20714 -#define RDECODE_VCN1_ENGINE_CNTL 0x20718 +#define RDECODE_VCN1_GPCOM_VCPU_CMD 0x2070c +#define RDECODE_VCN1_GPCOM_VCPU_DATA0 0x20710 +#define RDECODE_VCN1_GPCOM_VCPU_DATA1 0x20714 +#define RDECODE_VCN1_ENGINE_CNTL 0x20718 -#define RDECODE_VCN2_GPCOM_VCPU_CMD (0x503 << 2) -#define RDECODE_VCN2_GPCOM_VCPU_DATA0 (0x504 << 2) -#define RDECODE_VCN2_GPCOM_VCPU_DATA1 (0x505 << 2) -#define RDECODE_VCN2_ENGINE_CNTL (0x506 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_CMD (0x503 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_DATA0 (0x504 << 2) +#define RDECODE_VCN2_GPCOM_VCPU_DATA1 (0x505 << 2) +#define RDECODE_VCN2_ENGINE_CNTL (0x506 << 2) -#define RDECODE_VCN2_5_GPCOM_VCPU_CMD 0x3c -#define RDECODE_VCN2_5_GPCOM_VCPU_DATA0 0x40 -#define RDECODE_VCN2_5_GPCOM_VCPU_DATA1 0x44 -#define RDECODE_VCN2_5_ENGINE_CNTL 0x9b4 +#define RDECODE_VCN2_5_GPCOM_VCPU_CMD 0x3c +#define RDECODE_VCN2_5_GPCOM_VCPU_DATA0 0x40 +#define RDECODE_VCN2_5_GPCOM_VCPU_DATA1 0x44 +#define RDECODE_VCN2_5_ENGINE_CNTL 0x9b4 -#define NUM_MPEG2_REFS 6 -#define NUM_H264_REFS 17 -#define NUM_VC1_REFS 5 -#define NUM_VP9_REFS 8 +#define NUM_MPEG2_REFS 6 +#define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 +#define NUM_VP9_REFS 8 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec, - struct pipe_h264_picture_desc *pic) + struct pipe_h264_picture_desc *pic) { - rvcn_dec_message_avc_t result; - - memset(&result, 0, sizeof(result)); - switch (pic->base.profile) { - case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: - case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: - result.profile = RDECODE_H264_PROFILE_BASELINE; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: - result.profile = RDECODE_H264_PROFILE_MAIN; - break; - - case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - result.profile = RDECODE_H264_PROFILE_HIGH; - break; - - default: - assert(0); - break; - } - - result.level = dec->base.level; - - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; - result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; - result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; - result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; - result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; - - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; - result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; - result.log2_max_pic_order_cnt_lsb_minus4 = - pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - - switch (dec->base.chroma_format) { - case PIPE_VIDEO_CHROMA_FORMAT_NONE: - break; - case PIPE_VIDEO_CHROMA_FORMAT_400: - result.chroma_format = 0; - break; - case PIPE_VIDEO_CHROMA_FORMAT_420: - result.chroma_format = 1; - break; - case PIPE_VIDEO_CHROMA_FORMAT_422: - result.chroma_format = 2; - break; - case PIPE_VIDEO_CHROMA_FORMAT_444: - result.chroma_format = 3; - break; - } - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; - result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; - result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; - result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; - result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; - result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; - - result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; - result.slice_group_map_type = pic->pps->slice_group_map_type; - result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; - result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; - result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; - result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; - - memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); - memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); - - memcpy(dec->it, result.scaling_list_4x4, 6*16); - memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); - - result.num_ref_frames = pic->num_ref_frames; - - result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; - result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; - - result.frame_num = pic->frame_num; - memcpy(result.frame_num_list, pic->frame_num_list, 4*16); - result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; - result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; - memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); - - result.decoded_pic_idx = pic->frame_num; - - return result; + rvcn_dec_message_avc_t result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + result.profile = RDECODE_H264_PROFILE_BASELINE; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + result.profile = RDECODE_H264_PROFILE_MAIN; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + result.profile = RDECODE_H264_PROFILE_HIGH; + break; + + default: + assert(0); + break; + } + + result.level = dec->base.level; + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64); + + memcpy(dec->it, result.scaling_list_4x4, 6 * 16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64); + + result.num_ref_frames = pic->num_ref_frames; + + result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + result.frame_num = pic->frame_num; + memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16); + result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; + result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; + memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2); + + result.decoded_pic_idx = pic->frame_num; + + return result; } static void radeon_dec_destroy_associated_data(void *data) { - /* NOOP, since we only use an intptr */ + /* NOOP, since we only use an intptr */ } static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_h265_picture_desc *pic) + struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) { - rvcn_dec_message_hevc_t result; - unsigned i, j; - - memset(&result, 0, sizeof(result)); - result.sps_info_flags = 0; - result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; - result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; - result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; - result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; - result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; - result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; - result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; - result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; - result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) - result.sps_info_flags |= 1 << 9; - if (pic->UseRefPicList == true) - result.sps_info_flags |= 1 << 10; - - result.chroma_format = pic->pps->sps->chroma_format_idc; - result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; - result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; - result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; - result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; - result.log2_min_luma_coding_block_size_minus3 = - pic->pps->sps->log2_min_luma_coding_block_size_minus3; - result.log2_diff_max_min_luma_coding_block_size = - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - result.log2_min_transform_block_size_minus2 = - pic->pps->sps->log2_min_transform_block_size_minus2; - result.log2_diff_max_min_transform_block_size = - pic->pps->sps->log2_diff_max_min_transform_block_size; - result.max_transform_hierarchy_depth_inter = - pic->pps->sps->max_transform_hierarchy_depth_inter; - result.max_transform_hierarchy_depth_intra = - pic->pps->sps->max_transform_hierarchy_depth_intra; - result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; - result.pcm_sample_bit_depth_chroma_minus1 = - pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; - result.log2_min_pcm_luma_coding_block_size_minus3 = - pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; - result.log2_diff_max_min_pcm_luma_coding_block_size = - pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; - result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; - - result.pps_info_flags = 0; - result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; - result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; - result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; - result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; - result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; - result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; - result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; - result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; - result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; - result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; - result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; - result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; - result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; - result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; - result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; - result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; - result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; - result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; - result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; - result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; - - result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; - result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; - result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; - result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; - result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; - result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; - result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; - result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; - result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; - result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; - result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; - result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; - result.init_qp_minus26 = pic->pps->init_qp_minus26; - - for (i = 0; i < 19; ++i) - result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; - - for (i = 0; i < 21; ++i) - result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; - - result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; - result.curr_poc = pic->CurrPicOrderCntVal; - - for (i = 0 ; i < 16 ; i++) { - for (j = 0; (pic->ref[j] != NULL) && (j < 16) ; j++) { - if (dec->render_pic_list[i] == pic->ref[j]) - break; - if (j == 15) - dec->render_pic_list[i] = NULL; - else if (pic->ref[j+1] == NULL) - dec->render_pic_list[i] = NULL; - } - } - for (i = 0 ; i < 16 ; i++) { - if (dec->render_pic_list[i] == NULL) { - dec->render_pic_list[i] = target; - result.curr_idx = i; - break; - } - } - - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)result.curr_idx, - &radeon_dec_destroy_associated_data); - - for (i = 0; i < 16; ++i) { - struct pipe_video_buffer *ref = pic->ref[i]; - uintptr_t ref_pic = 0; - - result.poc_list[i] = pic->PicOrderCntVal[i]; - - if (ref) - ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - else - ref_pic = 0x7F; - result.ref_pic_list[i] = ref_pic; - } - - for (i = 0; i < 8; ++i) { - result.ref_pic_set_st_curr_before[i] = 0xFF; - result.ref_pic_set_st_curr_after[i] = 0xFF; - result.ref_pic_set_lt_curr[i] = 0xFF; - } - - for (i = 0; i < pic->NumPocStCurrBefore; ++i) - result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; - - for (i = 0; i < pic->NumPocStCurrAfter; ++i) - result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; - - for (i = 0; i < pic->NumPocLtCurr; ++i) - result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; - - for (i = 0; i < 6; ++i) - result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; - - for (i = 0; i < 2; ++i) - result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; - - memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); - memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); - memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); - memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); - - for (i = 0 ; i < 2 ; i++) { - for (j = 0 ; j < 15 ; j++) - result.direct_reflist[i][j] = pic->RefPicList[i][j]; - } - - if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { - if (target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.p010_mode = 0; - result.luma_10to8 = 5; - result.chroma_10to8 = 5; - result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ - result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ - } - } - - return result; + rvcn_dec_message_hevc_t result; + unsigned i, j; + + memset(&result, 0, sizeof(result)); + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_poc = pic->CurrPicOrderCntVal; + + for (i = 0; i < 16; i++) { + for (j = 0; (pic->ref[j] != NULL) && (j < 16); j++) { + if (dec->render_pic_list[i] == pic->ref[j]) + break; + if (j == 15) + dec->render_pic_list[i] = NULL; + else if (pic->ref[j + 1] == NULL) + dec->render_pic_list[i] = NULL; + } + } + for (i = 0; i < 16; i++) { + if (dec->render_pic_list[i] == NULL) { + dec->render_pic_list[i] = target; + result.curr_idx = i; + break; + } + } + + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx, + &radeon_dec_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 15; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ + result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ + } + } + + return result; } static void fill_probs_table(void *ptr) { - rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr; - - memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4)); - memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8)); - memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16)); - memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32)); - memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs)); - memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs)); - memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p)); - memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, sizeof(default_switchable_interp_prob)); - memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs)); - memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs)); - memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs)); - memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p)); - memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p)); - memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p)); - memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32)); - memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16)); - memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8)); - memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints)); - memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components)); - memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t)); + rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr; + + memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4)); + memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8)); + memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16)); + memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32)); + memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs)); + memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs)); + memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p)); + memcpy(probs->switchable_interp_prob, default_switchable_interp_prob, + sizeof(default_switchable_interp_prob)); + memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs)); + memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs)); + memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs)); + memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p)); + memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p)); + memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p)); + memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32)); + memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16)); + memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8)); + memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints)); + memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components)); + memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t)); } static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_vp9_picture_desc *pic) + struct pipe_video_buffer *target, + struct pipe_vp9_picture_desc *pic) { - rvcn_dec_message_vp9_t result; - unsigned i; - - memset(&result, 0, sizeof(result)); - - /* segment table */ - rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs); - - if (pic->picture_parameter.pic_fields.segmentation_enabled) { - for (i = 0; i < 8; ++i) { - prbs->seg.feature_data[i] = - (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) | - ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) | - ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24); - prbs->seg.feature_mask[i] = - (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) | - (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) | - (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) | - (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3); - } - - for (i = 0; i < 7; ++i) - prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i]; - - for (i = 0; i < 3; ++i) - prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; - - prbs->seg.abs_delta = 0; - } else - memset(&prbs->seg, 0, 256); - - result.frame_header_flags = - (pic->picture_parameter.pic_fields.frame_type << - RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.error_resilient_mode << - RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.intra_only << - RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.allow_high_precision_mv << - RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode << - RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.refresh_frame_context << - RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_enabled << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_update_map << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.pic_fields.segmentation_temporal_update << - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.mode_ref_delta_enabled << - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK; - - result.frame_header_flags |= - (pic->picture_parameter.mode_ref_delta_update << - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; - - result.frame_header_flags |= ((dec->show_frame && - !pic->picture_parameter.pic_fields.error_resilient_mode) - << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & - RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; - dec->show_frame = pic->picture_parameter.pic_fields.show_frame; - - result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type; - - result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx; - result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context; - - result.filter_level = pic->picture_parameter.filter_level; - result.sharpness_level = pic->picture_parameter.sharpness_level; - - for (i = 0; i < 8; ++i) - memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2); - - if (pic->picture_parameter.pic_fields.lossless_flag) { - result.base_qindex = 0; - result.y_dc_delta_q = 0; - result.uv_ac_delta_q = 0; - result.uv_dc_delta_q = 0; - } else { - result.base_qindex = pic->picture_parameter.base_qindex; - result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q; - result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q; - result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q; - } - - result.log2_tile_cols = pic->picture_parameter.log2_tile_columns; - result.log2_tile_rows = pic->picture_parameter.log2_tile_rows; - result.chroma_format = 1; - result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 - = (pic->picture_parameter.bit_depth - 8); - - result.vp9_frame_size = align(dec->bs_size, 128); - result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes; - result.compressed_header_size = pic->picture_parameter.first_partition_size; - - assert(dec->base.max_references + 1 <= 16); - - for (i = 0 ; i < 16 ; ++i) { - if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) { - result.curr_pic_idx = - (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base); - break; - } else if (!dec->render_pic_list[i]) { - dec->render_pic_list[i] = target; - result.curr_pic_idx = dec->ref_idx; - vl_video_buffer_set_associated_data(target, &dec->base, - (void *)(uintptr_t)dec->ref_idx++, - &radeon_dec_destroy_associated_data); - break; - } - } - - for (i = 0 ; i < 8; i++) { - result.ref_frame_map[i] = (pic->ref[i]) ? - (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : - 0x7f; - } - - result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame]; - result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias; - result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame]; - result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias; - result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame]; - result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias; - - if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) { - if (target->buffer_format == PIPE_FORMAT_P016) { - result.p010_mode = 1; - result.msb_mode = 1; - } else { - result.p010_mode = 0; - result.luma_10to8 = 1; - result.chroma_10to8 = 1; - } - } - - return result; + rvcn_dec_message_vp9_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + /* segment table */ + rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs); + + if (pic->picture_parameter.pic_fields.segmentation_enabled) { + for (i = 0; i < 8; ++i) { + prbs->seg.feature_data[i] = + (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) | + ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) | + ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24); + prbs->seg.feature_mask[i] = + (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) | + (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) | + (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3); + } + + for (i = 0; i < 7; ++i) + prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i]; + + for (i = 0; i < 3; ++i) + prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i]; + + prbs->seg.abs_delta = 0; + } else + memset(&prbs->seg, 0, 256); + + result.frame_header_flags = (pic->picture_parameter.pic_fields.frame_type + << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.error_resilient_mode + << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.intra_only + << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.allow_high_precision_mv + << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode + << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.refresh_frame_context + << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_enabled + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_update_map + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK; + + result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_temporal_update + << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK; + + result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_enabled + << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK; + + result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_update + << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK; + + result.frame_header_flags |= + ((dec->show_frame && !pic->picture_parameter.pic_fields.error_resilient_mode) + << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) & + RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK; + dec->show_frame = pic->picture_parameter.pic_fields.show_frame; + + result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type; + + result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx; + result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context; + + result.filter_level = pic->picture_parameter.filter_level; + result.sharpness_level = pic->picture_parameter.sharpness_level; + + for (i = 0; i < 8; ++i) + memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2); + + if (pic->picture_parameter.pic_fields.lossless_flag) { + result.base_qindex = 0; + result.y_dc_delta_q = 0; + result.uv_ac_delta_q = 0; + result.uv_dc_delta_q = 0; + } else { + result.base_qindex = pic->picture_parameter.base_qindex; + result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q; + result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q; + result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q; + } + + result.log2_tile_cols = pic->picture_parameter.log2_tile_columns; + result.log2_tile_rows = pic->picture_parameter.log2_tile_rows; + result.chroma_format = 1; + result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = + (pic->picture_parameter.bit_depth - 8); + + result.vp9_frame_size = align(dec->bs_size, 128); + result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes; + result.compressed_header_size = pic->picture_parameter.first_partition_size; + + assert(dec->base.max_references + 1 <= 16); + + for (i = 0; i < 16; ++i) { + if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) { + result.curr_pic_idx = (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base); + break; + } else if (!dec->render_pic_list[i]) { + dec->render_pic_list[i] = target; + result.curr_pic_idx = dec->ref_idx; + vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)dec->ref_idx++, + &radeon_dec_destroy_associated_data); + break; + } + } + + for (i = 0; i < 8; i++) { + result.ref_frame_map[i] = + (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) + : 0x7f; + } + + result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame]; + result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias; + result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame]; + result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias; + result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame]; + result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias; + + if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) { + if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.p010_mode = 0; + result.luma_10to8 = 1; + result.chroma_10to8 = 1; + } + } + + return result; } static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec) { - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - width = align (width, 16); - height = align (height, 16); - return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; + width = align(width, 16); + height = align(height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; } -static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic) +static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, + struct pipe_h265_picture_desc *pic) { - unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; - unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; - unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || - pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = + (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; - unsigned max_references = dec->base.max_references + 1; + unsigned max_references = dec->base.max_references + 1; - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); - log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + - pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; - width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; - num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); - context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); - max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned)ceil(height * 8 / 2048.0); - cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; - db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); - return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; } static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic) { - rvcn_dec_message_vc1_t result; - - memset(&result, 0, sizeof(result)); - switch(pic->base.profile) { - case PIPE_VIDEO_PROFILE_VC1_SIMPLE: - result.profile = RDECODE_VC1_PROFILE_SIMPLE; - result.level = 1; - break; - - case PIPE_VIDEO_PROFILE_VC1_MAIN: - result.profile = RDECODE_VC1_PROFILE_MAIN; - result.level = 2; - break; - - case PIPE_VIDEO_PROFILE_VC1_ADVANCED: - result.profile = RDECODE_VC1_PROFILE_ADVANCED; - result.level = 4; - break; - - default: - assert(0); - } - - result.sps_info_flags |= pic->postprocflag << 7; - result.sps_info_flags |= pic->pulldown << 6; - result.sps_info_flags |= pic->interlace << 5; - result.sps_info_flags |= pic->tfcntrflag << 4; - result.sps_info_flags |= pic->finterpflag << 3; - result.sps_info_flags |= pic->psf << 1; - - result.pps_info_flags |= pic->range_mapy_flag << 31; - result.pps_info_flags |= pic->range_mapy << 28; - result.pps_info_flags |= pic->range_mapuv_flag << 27; - result.pps_info_flags |= pic->range_mapuv << 24; - result.pps_info_flags |= pic->multires << 21; - result.pps_info_flags |= pic->maxbframes << 16; - result.pps_info_flags |= pic->overlap << 11; - result.pps_info_flags |= pic->quantizer << 9; - result.pps_info_flags |= pic->panscan_flag << 7; - result.pps_info_flags |= pic->refdist_flag << 6; - result.pps_info_flags |= pic->vstransform << 0; - - if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { - result.pps_info_flags |= pic->syncmarker << 20; - result.pps_info_flags |= pic->rangered << 19; - result.pps_info_flags |= pic->loopfilter << 5; - result.pps_info_flags |= pic->fastuvmc << 4; - result.pps_info_flags |= pic->extended_mv << 3; - result.pps_info_flags |= pic->extended_dmv << 8; - result.pps_info_flags |= pic->dquant << 1; - } - - result.chroma_format = 1; - - return result; + rvcn_dec_message_vc1_t result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + result.profile = RDECODE_VC1_PROFILE_SIMPLE; + result.level = 1; + break; + + case PIPE_VIDEO_PROFILE_VC1_MAIN: + result.profile = RDECODE_VC1_PROFILE_MAIN; + result.level = 2; + break; + + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + result.profile = RDECODE_VC1_PROFILE_ADVANCED; + result.level = 4; + break; + + default: + assert(0); + } + + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; + + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { + result.pps_info_flags |= pic->syncmarker << 20; + result.pps_info_flags |= pic->rangered << 19; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; + } + + result.chroma_format = 1; + + return result; } static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref) { - uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; - uint32_t max = MAX2(dec->frame_number, 1) - 1; - uintptr_t frame; + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; + uintptr_t frame; - /* seems to be the most sane fallback */ - if (!ref) - return max; + /* seems to be the most sane fallback */ + if (!ref) + return max; - /* get the frame number from the associated data */ - frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + /* get the frame number from the associated data */ + frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); - /* limit the frame number to a valid range */ - return MAX2(MIN2(frame, max), min); + /* limit the frame number to a valid range */ + return MAX2(MIN2(frame, max), min); } static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec, - struct pipe_mpeg12_picture_desc *pic) + struct pipe_mpeg12_picture_desc *pic) { - const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; - rvcn_dec_message_mpeg2_vld_t result; - unsigned i; - - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; - - result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); - result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); - - if(pic->intra_matrix) { - result.load_intra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; - } - } - if(pic->non_intra_matrix) { - result.load_nonintra_quantiser_matrix = 1; - for (i = 0; i < 64; ++i) { - result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; - } - } - - result.profile_and_level_indication = 0; - result.chroma_format = 0x1; - - result.picture_coding_type = pic->picture_coding_type; - result.f_code[0][0] = pic->f_code[0][0] + 1; - result.f_code[0][1] = pic->f_code[0][1] + 1; - result.f_code[1][0] = pic->f_code[1][0] + 1; - result.f_code[1][1] = pic->f_code[1][1] + 1; - result.intra_dc_precision = pic->intra_dc_precision; - result.pic_structure = pic->picture_structure; - result.top_field_first = pic->top_field_first; - result.frame_pred_frame_dct = pic->frame_pred_frame_dct; - result.concealment_motion_vectors = pic->concealment_motion_vectors; - result.q_scale_type = pic->q_scale_type; - result.intra_vlc_format = pic->intra_vlc_format; - result.alternate_scan = pic->alternate_scan; - - return result; + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + rvcn_dec_message_mpeg2_vld_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + + result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); + result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); + + if (pic->intra_matrix) { + result.load_intra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + } + } + if (pic->non_intra_matrix) { + result.load_nonintra_quantiser_matrix = 1; + for (i = 0; i < 64; ++i) { + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } + } + + result.profile_and_level_indication = 0; + result.chroma_format = 0x1; + + result.picture_coding_type = pic->picture_coding_type; + result.f_code[0][0] = pic->f_code[0][0] + 1; + result.f_code[0][1] = pic->f_code[0][1] + 1; + result.f_code[1][0] = pic->f_code[1][0] + 1; + result.f_code[1][1] = pic->f_code[1][1] + 1; + result.intra_dc_precision = pic->intra_dc_precision; + result.pic_structure = pic->picture_structure; + result.top_field_first = pic->top_field_first; + result.frame_pred_frame_dct = pic->frame_pred_frame_dct; + result.concealment_motion_vectors = pic->concealment_motion_vectors; + result.q_scale_type = pic->q_scale_type; + result.intra_vlc_format = pic->intra_vlc_format; + result.alternate_scan = pic->alternate_scan; + + return result; } static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec, - struct pipe_mpeg4_picture_desc *pic) + struct pipe_mpeg4_picture_desc *pic) { - rvcn_dec_message_mpeg4_asp_vld_t result; - unsigned i; + rvcn_dec_message_mpeg4_asp_vld_t result; + unsigned i; - memset(&result, 0, sizeof(result)); - result.decoded_pic_idx = dec->frame_number; + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; - result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); - result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); + result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]); + result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]); - result.variant_type = 0; - result.profile_and_level_indication = 0xF0; + result.variant_type = 0; + result.profile_and_level_indication = 0xF0; - result.video_object_layer_verid = 0x5; - result.video_object_layer_shape = 0x0; + result.video_object_layer_verid = 0x5; + result.video_object_layer_shape = 0x0; - result.video_object_layer_width = dec->base.width; - result.video_object_layer_height = dec->base.height; + result.video_object_layer_width = dec->base.width; + result.video_object_layer_height = dec->base.height; - result.vop_time_increment_resolution = pic->vop_time_increment_resolution; + result.vop_time_increment_resolution = pic->vop_time_increment_resolution; - result.short_video_header = pic->short_video_header; - result.interlaced = pic->interlaced; - result.load_intra_quant_mat = 1; - result.load_nonintra_quant_mat = 1; - result.quarter_sample = pic->quarter_sample; - result.complexity_estimation_disable = 1; - result.resync_marker_disable = pic->resync_marker_disable; - result.newpred_enable = 0; - result.reduced_resolution_vop_enable = 0; + result.short_video_header = pic->short_video_header; + result.interlaced = pic->interlaced; + result.load_intra_quant_mat = 1; + result.load_nonintra_quant_mat = 1; + result.quarter_sample = pic->quarter_sample; + result.complexity_estimation_disable = 1; + result.resync_marker_disable = pic->resync_marker_disable; + result.newpred_enable = 0; + result.reduced_resolution_vop_enable = 0; - result.quant_type = pic->quant_type; + result.quant_type = pic->quant_type; - for (i = 0; i < 64; ++i) { - result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; - result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; - } + for (i = 0; i < 64; ++i) { + result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; + result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; + } - return result; + return result; } static void rvcn_dec_message_create(struct radeon_decoder *dec) { - rvcn_dec_message_header_t *header = dec->msg; - rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t); - unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); - - memset(dec->msg, 0, sizes); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizes; - header->num_buffers = 1; - header->msg_type = RDECODE_MSG_CREATE; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = 0; - - header->index[0].message_id = RDECODE_MESSAGE_CREATE; - header->index[0].offset = sizeof(rvcn_dec_message_header_t); - header->index[0].size = sizeof(rvcn_dec_message_create_t); - header->index[0].filled = 0; - - create->stream_type = dec->stream_type; - create->session_flags = 0; - create->width_in_samples = dec->base.width; - create->height_in_samples = dec->base.height; + rvcn_dec_message_header_t *header = dec->msg; + rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t); + unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); + + memset(dec->msg, 0, sizes); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizes; + header->num_buffers = 1; + header->msg_type = RDECODE_MSG_CREATE; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = 0; + + header->index[0].message_id = RDECODE_MESSAGE_CREATE; + header->index[0].offset = sizeof(rvcn_dec_message_header_t); + header->index[0].size = sizeof(rvcn_dec_message_create_t); + header->index[0].filled = 0; + + create->stream_type = dec->stream_type; + create->session_flags = 0; + create->width_in_samples = dec->base.width; + create->height_in_samples = dec->base.height; } static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct si_texture *luma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[0]; - struct si_texture *chroma = (struct si_texture *) - ((struct vl_video_buffer *)target)->resources[1]; - rvcn_dec_message_header_t *header; - rvcn_dec_message_index_t *index; - rvcn_dec_message_decode_t *decode; - unsigned sizes = 0, offset_decode, offset_codec; - void *codec; - - header = dec->msg; - sizes += sizeof(rvcn_dec_message_header_t); - index = (void*)header + sizeof(rvcn_dec_message_header_t); - sizes += sizeof(rvcn_dec_message_index_t); - offset_decode = sizes; - decode = (void*)index + sizeof(rvcn_dec_message_index_t); - sizes += sizeof(rvcn_dec_message_decode_t); - offset_codec = sizes; - codec = (void*)decode + sizeof(rvcn_dec_message_decode_t); - - memset(dec->msg, 0, sizes); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizes; - header->num_buffers = 2; - header->msg_type = RDECODE_MSG_DECODE; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = dec->frame_number; - - header->index[0].message_id = RDECODE_MESSAGE_DECODE; - header->index[0].offset = offset_decode; - header->index[0].size = sizeof(rvcn_dec_message_decode_t); - header->index[0].filled = 0; - - index->offset = offset_codec; - index->size = sizeof(rvcn_dec_message_avc_t); - index->filled = 0; - - decode->stream_type = dec->stream_type; - decode->decode_flags = 0x1; - decode->width_in_samples = dec->base.width; - decode->height_in_samples = dec->base.height; - - decode->bsd_size = align(dec->bs_size, 128); - decode->dpb_size = dec->dpb.res->buf->size; - decode->dt_size = - si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + - si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; - - decode->sct_size = 0; - decode->sc_coeff_size = 0; - - decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; - decode->db_pitch = (((struct si_screen*)dec->screen)->info.family >= CHIP_ARCTURUS && - dec->base.width > 32 && dec->stream_type == RDECODE_CODEC_VP9) ? - align(dec->base.width, 64) : - align(dec->base.width, 32) ; - decode->db_surf_tile_config = 0; - - decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; - decode->dt_uv_pitch = decode->dt_pitch / 2; - - decode->dt_tiling_mode = 0; - decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR; - decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR; - decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced; - decode->dt_surf_tile_config = 0; - decode->dt_uv_surf_tile_config = 0; - - decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; - decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; - if (decode->dt_field_mode) { - decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + - luma->surface.u.gfx9.surf_slice_size; - decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + - chroma->surface.u.gfx9.surf_slice_size; - } else { - decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; - decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; - } - - switch (u_reduce_video_profile(picture->profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - rvcn_dec_message_avc_t avc = - get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); - memcpy(codec, (void*)&avc, sizeof(rvcn_dec_message_avc_t)); - index->message_id = RDECODE_MESSAGE_AVC; - break; - } - case PIPE_VIDEO_FORMAT_HEVC: { - rvcn_dec_message_hevc_t hevc = - get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); - - memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t)); - index->message_id = RDECODE_MESSAGE_HEVC; - if (dec->ctx.res == NULL) { - unsigned ctx_size; - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - ctx_size = calc_ctx_size_h265_main10(dec, - (struct pipe_h265_picture_desc*)picture); - else - ctx_size = calc_ctx_size_h265_main(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) - RVID_ERR("Can't allocated context buffer.\n"); - si_vid_clear_buffer(dec->base.context, &dec->ctx); - } - break; - } - case PIPE_VIDEO_FORMAT_VC1: { - rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); - - memcpy(codec, (void*)&vc1, sizeof(rvcn_dec_message_vc1_t)); - if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || - (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { - decode->width_in_samples = align(decode->width_in_samples, 16) / 16; - decode->height_in_samples = align(decode->height_in_samples, 16) / 16; - } - index->message_id = RDECODE_MESSAGE_VC1; - break; - - } - case PIPE_VIDEO_FORMAT_MPEG12: { - rvcn_dec_message_mpeg2_vld_t mpeg2 = - get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); - - memcpy(codec, (void*)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t)); - index->message_id = RDECODE_MESSAGE_MPEG2_VLD; - break; - } - case PIPE_VIDEO_FORMAT_MPEG4: { - rvcn_dec_message_mpeg4_asp_vld_t mpeg4 = - get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); - - memcpy(codec, (void*)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t)); - index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD; - break; - } - case PIPE_VIDEO_FORMAT_VP9: { - rvcn_dec_message_vp9_t vp9 = - get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc*)picture); - - memcpy(codec, (void*)&vp9, sizeof(rvcn_dec_message_vp9_t)); - index->message_id = RDECODE_MESSAGE_VP9; - - if (dec->ctx.res == NULL) { - unsigned ctx_size; - uint8_t *ptr; - - /* default probability + probability data */ - ctx_size = 2304 * 5; - - if (((struct si_screen*)dec->screen)->info.family >= CHIP_ARCTURUS) { - /* SRE collocated context data */ - ctx_size += 32 * 2 * 128 * 68; - /* SMP collocated context data */ - ctx_size += 9 * 64 * 2 * 128 * 68; - /* SDB left tile pixel */ - ctx_size += 8 * 2 * 8192; - } else { - ctx_size += 32 * 2 * 64 * 64; - ctx_size += 9 * 64 * 2 * 64 * 64; - ctx_size += 8 * 2 * 4096; - } - - if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) - ctx_size += 8 * 2 * 4096; - - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) - RVID_ERR("Can't allocated context buffer.\n"); - si_vid_clear_buffer(dec->base.context, &dec->ctx); - - /* ctx needs probs table */ - ptr = dec->ws->buffer_map( - dec->ctx.res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - fill_probs_table(ptr); - dec->ws->buffer_unmap(dec->ctx.res->buf); - } - break; - } - default: - assert(0); - return NULL; - } - - if (dec->ctx.res) - decode->hw_ctxt_size = dec->ctx.res->buf->size; - - return luma->buffer.buf; + struct si_texture *luma = (struct si_texture *)((struct vl_video_buffer *)target)->resources[0]; + struct si_texture *chroma = + (struct si_texture *)((struct vl_video_buffer *)target)->resources[1]; + rvcn_dec_message_header_t *header; + rvcn_dec_message_index_t *index; + rvcn_dec_message_decode_t *decode; + unsigned sizes = 0, offset_decode, offset_codec; + void *codec; + + header = dec->msg; + sizes += sizeof(rvcn_dec_message_header_t); + index = (void *)header + sizeof(rvcn_dec_message_header_t); + sizes += sizeof(rvcn_dec_message_index_t); + offset_decode = sizes; + decode = (void *)index + sizeof(rvcn_dec_message_index_t); + sizes += sizeof(rvcn_dec_message_decode_t); + offset_codec = sizes; + codec = (void *)decode + sizeof(rvcn_dec_message_decode_t); + + memset(dec->msg, 0, sizes); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizes; + header->num_buffers = 2; + header->msg_type = RDECODE_MSG_DECODE; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = dec->frame_number; + + header->index[0].message_id = RDECODE_MESSAGE_DECODE; + header->index[0].offset = offset_decode; + header->index[0].size = sizeof(rvcn_dec_message_decode_t); + header->index[0].filled = 0; + + index->offset = offset_codec; + index->size = sizeof(rvcn_dec_message_avc_t); + index->filled = 0; + + decode->stream_type = dec->stream_type; + decode->decode_flags = 0x1; + decode->width_in_samples = dec->base.width; + decode->height_in_samples = dec->base.height; + + decode->bsd_size = align(dec->bs_size, 128); + decode->dpb_size = dec->dpb.res->buf->size; + decode->dt_size = si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + + si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; + + decode->sct_size = 0; + decode->sc_coeff_size = 0; + + decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE; + decode->db_pitch = (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR && + dec->base.width > 32 && dec->stream_type == RDECODE_CODEC_VP9) + ? align(dec->base.width, 64) + : align(dec->base.width, 32); + decode->db_surf_tile_config = 0; + + decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; + decode->dt_uv_pitch = decode->dt_pitch / 2; + + decode->dt_tiling_mode = 0; + decode->dt_swizzle_mode = RDECODE_SW_MODE_LINEAR; + decode->dt_array_mode = RDECODE_ARRAY_MODE_LINEAR; + decode->dt_field_mode = ((struct vl_video_buffer *)target)->base.interlaced; + decode->dt_surf_tile_config = 0; + decode->dt_uv_surf_tile_config = 0; + + decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset; + decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset; + if (decode->dt_field_mode) { + decode->dt_luma_bottom_offset = + luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size; + decode->dt_chroma_bottom_offset = + chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size; + } else { + decode->dt_luma_bottom_offset = decode->dt_luma_top_offset; + decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset; + } + + switch (u_reduce_video_profile(picture->profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + rvcn_dec_message_avc_t avc = get_h264_msg(dec, (struct pipe_h264_picture_desc *)picture); + memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t)); + index->message_id = RDECODE_MESSAGE_AVC; + break; + } + case PIPE_VIDEO_FORMAT_HEVC: { + rvcn_dec_message_hevc_t hevc = + get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture); + + memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t)); + index->message_id = RDECODE_MESSAGE_HEVC; + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + si_vid_clear_buffer(dec->base.context, &dec->ctx); + } + break; + } + case PIPE_VIDEO_FORMAT_VC1: { + rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture); + + memcpy(codec, (void *)&vc1, sizeof(rvcn_dec_message_vc1_t)); + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + decode->width_in_samples = align(decode->width_in_samples, 16) / 16; + decode->height_in_samples = align(decode->height_in_samples, 16) / 16; + } + index->message_id = RDECODE_MESSAGE_VC1; + break; + } + case PIPE_VIDEO_FORMAT_MPEG12: { + rvcn_dec_message_mpeg2_vld_t mpeg2 = + get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture); + + memcpy(codec, (void *)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t)); + index->message_id = RDECODE_MESSAGE_MPEG2_VLD; + break; + } + case PIPE_VIDEO_FORMAT_MPEG4: { + rvcn_dec_message_mpeg4_asp_vld_t mpeg4 = + get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture); + + memcpy(codec, (void *)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t)); + index->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD; + break; + } + case PIPE_VIDEO_FORMAT_VP9: { + rvcn_dec_message_vp9_t vp9 = + get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc *)picture); + + memcpy(codec, (void *)&vp9, sizeof(rvcn_dec_message_vp9_t)); + index->message_id = RDECODE_MESSAGE_VP9; + + if (dec->ctx.res == NULL) { + unsigned ctx_size; + uint8_t *ptr; + + /* default probability + probability data */ + ctx_size = 2304 * 5; + + if (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR) { + /* SRE collocated context data */ + ctx_size += 32 * 2 * 128 * 68; + /* SMP collocated context data */ + ctx_size += 9 * 64 * 2 * 128 * 68; + /* SDB left tile pixel */ + ctx_size += 8 * 2 * 2 * 8192; + } else { + ctx_size += 32 * 2 * 64 * 64; + ctx_size += 9 * 64 * 2 * 64 * 64; + ctx_size += 8 * 2 * 4096; + } + + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + ctx_size += 8 * 2 * 4096; + + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + si_vid_clear_buffer(dec->base.context, &dec->ctx); + + /* ctx needs probs table */ + ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + fill_probs_table(ptr); + dec->ws->buffer_unmap(dec->ctx.res->buf); + } + break; + } + default: + assert(0); + return NULL; + } + + if (dec->ctx.res) + decode->hw_ctxt_size = dec->ctx.res->buf->size; + + return luma->buffer.buf; } static void rvcn_dec_message_destroy(struct radeon_decoder *dec) { - rvcn_dec_message_header_t *header = dec->msg; - - memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t)); - header->header_size = sizeof(rvcn_dec_message_header_t); - header->total_size = sizeof(rvcn_dec_message_header_t) - - sizeof(rvcn_dec_message_index_t); - header->num_buffers = 0; - header->msg_type = RDECODE_MSG_DESTROY; - header->stream_handle = dec->stream_handle; - header->status_report_feedback_number = 0; + rvcn_dec_message_header_t *header = dec->msg; + + memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t)); + header->header_size = sizeof(rvcn_dec_message_header_t); + header->total_size = sizeof(rvcn_dec_message_header_t) - sizeof(rvcn_dec_message_index_t); + header->num_buffers = 0; + header->msg_type = RDECODE_MSG_DESTROY; + header->stream_handle = dec->stream_handle; + header->status_report_feedback_number = 0; } static void rvcn_dec_message_feedback(struct radeon_decoder *dec) { - rvcn_dec_feedback_header_t *header = (void*)dec->fb; + rvcn_dec_feedback_header_t *header = (void *)dec->fb; - header->header_size = sizeof(rvcn_dec_feedback_header_t); - header->total_size = sizeof(rvcn_dec_feedback_header_t); - header->num_buffers = 0; + header->header_size = sizeof(rvcn_dec_feedback_header_t); + header->total_size = sizeof(rvcn_dec_feedback_header_t); + header->num_buffers = 0; } /* flush IB to the hardware */ static int flush(struct radeon_decoder *dec, unsigned flags) { - return dec->ws->cs_flush(dec->cs, flags, NULL); + return dec->ws->cs_flush(dec->cs, flags, NULL); } /* add a new set register command to the IB */ static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val) { - radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0)); - radeon_emit(dec->cs, val); + radeon_emit(dec->cs, RDECODE_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); } /* send a command to the VCPU through the GPCOM registers */ -static void send_cmd(struct radeon_decoder *dec, unsigned cmd, - struct pb_buffer* buf, uint32_t off, - enum radeon_bo_usage usage, enum radeon_bo_domain domain) +static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer *buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) { - uint64_t addr; + uint64_t addr; - dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, - domain, 0); - addr = dec->ws->buffer_get_virtual_address(buf); - addr = addr + off; + dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain, 0); + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; - set_reg(dec, dec->reg.data0, addr); - set_reg(dec, dec->reg.data1, addr >> 32); - set_reg(dec, dec->reg.cmd, cmd << 1); + set_reg(dec, dec->reg.data0, addr); + set_reg(dec, dec->reg.data1, addr >> 32); + set_reg(dec, dec->reg.cmd, cmd << 1); } /* do the codec needs an IT buffer ?*/ static bool have_it(struct radeon_decoder *dec) { - return dec->stream_type == RDECODE_CODEC_H264_PERF || - dec->stream_type == RDECODE_CODEC_H265; + return dec->stream_type == RDECODE_CODEC_H264_PERF || dec->stream_type == RDECODE_CODEC_H265; } /* do the codec needs an probs buffer? */ static bool have_probs(struct radeon_decoder *dec) { - return dec->stream_type == RDECODE_CODEC_VP9; + return dec->stream_type == RDECODE_CODEC_VP9; } /* map the next available message/feedback/itscaling buffer */ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec) { - struct rvid_buffer* buf; - uint8_t *ptr; + struct rvid_buffer *buf; + uint8_t *ptr; - /* grab the current message/feedback buffer */ - buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + /* and map it for CPU access */ + ptr = + dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - /* calc buffer offsets */ - dec->msg = ptr; + /* calc buffer offsets */ + dec->msg = ptr; - dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); - if (have_it(dec)) - dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); - else if (have_probs(dec)) - dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); + else if (have_probs(dec)) + dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE); } /* unmap and send a message command to the VCPU */ static void send_msg_buf(struct radeon_decoder *dec) { - struct rvid_buffer* buf; - - /* ignore the request if message/feedback buffer isn't mapped */ - if (!dec->msg || !dec->fb) - return; - - /* grab the current message buffer */ - buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - - /* unmap the buffer */ - dec->ws->buffer_unmap(buf->res->buf); - dec->msg = NULL; - dec->fb = NULL; - dec->it = NULL; - dec->probs = NULL; - - if (dec->sessionctx.res) - send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, - dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, - RADEON_DOMAIN_VRAM); - - /* and send it to the hardware */ - send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, - RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + struct rvid_buffer *buf; + + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; + + /* grab the current message buffer */ + buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; + dec->probs = NULL; + + if (dec->sessionctx.res) + send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + + /* and send it to the hardware */ + send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); } /* cycle to the next set of buffers */ static void next_buffer(struct radeon_decoder *dec) { - ++dec->cur_buffer; - dec->cur_buffer %= NUM_BUFFERS; + ++dec->cur_buffer; + dec->cur_buffer %= NUM_BUFFERS; } static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec) { - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - unsigned max_references = dec->base.max_references + 1; - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); - - return ctx_size; + unsigned width_in_mb, height_in_mb, ctx_size; + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); + + return ctx_size; } /* calculate size of reference picture buffer */ static unsigned calc_dpb_size(struct radeon_decoder *dec) { - unsigned width_in_mb, height_in_mb, image_size, dpb_size; - - // always align them to MB size for dpb calculation - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - // always one more for currently decoded picture - unsigned max_references = dec->base.max_references + 1; - - // aligned size of a single frame - image_size = align(width, 32) * height; - image_size += image_size / 2; - image_size = align(image_size, 1024); - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - switch (u_reduce_video_profile(dec->base.profile)) { - case PIPE_VIDEO_FORMAT_MPEG4_AVC: { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - dpb_size = image_size * max_references; - break; - } - - case PIPE_VIDEO_FORMAT_HEVC: - if (dec->base.width * dec->base.height >= 4096*2000) - max_references = MAX2(max_references, 8); - else - max_references = MAX2(max_references, 17); - - width = align (width, 16); - height = align (height, 16); - if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) - dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references; - else - dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references; - break; - - case PIPE_VIDEO_FORMAT_VC1: - // the firmware seems to allways assume a minimum of ref frames - max_references = MAX2(NUM_VC1_REFS, max_references); - - // reference picture buffer - dpb_size = image_size * max_references; - - // CONTEXT_BUFFER - dpb_size += width_in_mb * height_in_mb * 128; - - // IT surface buffer - dpb_size += width_in_mb * 64; - - // DB surface buffer - dpb_size += width_in_mb * 128; - - // BP - dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); - break; - - case PIPE_VIDEO_FORMAT_MPEG12: - // reference picture buffer, must be big enough for all frames - dpb_size = image_size * NUM_MPEG2_REFS; - break; - - case PIPE_VIDEO_FORMAT_MPEG4: - // reference picture buffer - dpb_size = image_size * max_references; - - // CM - dpb_size += width_in_mb * height_in_mb * 64; - - // IT surface buffer - dpb_size += align(width_in_mb * height_in_mb * 32, 64); - - dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); - break; - - case PIPE_VIDEO_FORMAT_VP9: - max_references = MAX2(max_references, 9); - - dpb_size = (((struct si_screen*)dec->screen)->info.family >= CHIP_ARCTURUS) ? - (8192 * 4320 * 3 / 2) * max_references : - (4096 * 3000 * 3 / 2) * max_references; - - if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) - dpb_size *= (3 / 2); - break; - - case PIPE_VIDEO_FORMAT_JPEG: - dpb_size = 0; - break; - - default: - // something is missing here - assert(0); - - // at least use a sane default value - dpb_size = 32 * 1024 * 1024; - break; - } - return dpb_size; + unsigned width_in_mb, height_in_mb, image_size, dpb_size; + + // always align them to MB size for dpb calculation + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + // always one more for currently decoded picture + unsigned max_references = dec->base.max_references + 1; + + // aligned size of a single frame + image_size = align(width, 32) * height; + image_size += image_size / 2; + image_size = align(image_size, 1024); + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned num_dpb_buffer; + + switch (dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + break; + } + + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096 * 2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align(width, 16); + height = align(height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((align(width, 32) * height * 9) / 4, 256) * max_references; + else + dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references; + break; + + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + + // reference picture buffer + dpb_size = image_size * max_references; + + // CONTEXT_BUFFER + dpb_size += width_in_mb * height_in_mb * 128; + + // IT surface buffer + dpb_size += width_in_mb * 64; + + // DB surface buffer + dpb_size += width_in_mb * 128; + + // BP + dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + // reference picture buffer, must be big enough for all frames + dpb_size = image_size * NUM_MPEG2_REFS; + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + // reference picture buffer + dpb_size = image_size * max_references; + + // CM + dpb_size += width_in_mb * height_in_mb * 64; + + // IT surface buffer + dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); + break; + + case PIPE_VIDEO_FORMAT_VP9: + max_references = MAX2(max_references, 9); + + dpb_size = (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR) + ? (8192 * 4320 * 3 / 2) * max_references + : (4096 * 3000 * 3 / 2) * max_references; + + if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) + dpb_size *= (3 / 2); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + + default: + // something is missing here + assert(0); + + // at least use a sane default value + dpb_size = 32 * 1024 * 1024; + break; + } + return dpb_size; } /** @@ -1291,169 +1261,162 @@ static unsigned calc_dpb_size(struct radeon_decoder *dec) */ static void radeon_dec_destroy(struct pipe_video_codec *decoder) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - unsigned i; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + unsigned i; - assert(decoder); + assert(decoder); - map_msg_fb_it_probs_buf(dec); - rvcn_dec_message_destroy(dec); - send_msg_buf(dec); + map_msg_fb_it_probs_buf(dec); + rvcn_dec_message_destroy(dec); + send_msg_buf(dec); - flush(dec, 0); + flush(dec, 0); - dec->ws->cs_destroy(dec->cs); + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); } /** * start decoding of a new frame */ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - uintptr_t frame; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + uintptr_t frame; - assert(decoder); + assert(decoder); - frame = ++dec->frame_number; - if (dec->stream_type != RDECODE_CODEC_VP9) - vl_video_buffer_set_associated_data(target, decoder, (void *)frame, - &radeon_dec_destroy_associated_data); + frame = ++dec->frame_number; + if (dec->stream_type != RDECODE_CODEC_VP9) + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + &radeon_dec_destroy_associated_data); - dec->bs_size = 0; - dec->bs_ptr = dec->ws->buffer_map( - dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + dec->bs_size = 0; + dec->bs_ptr = dec->ws->buffer_map(dec->bs_buffers[dec->cur_buffer].res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** * decode a macroblock */ static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - const struct pipe_macroblock *macroblocks, - unsigned num_macroblocks) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) { - /* not supported (yet) */ - assert(0); + /* not supported (yet) */ + assert(0); } /** * decode a bitstream */ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture, - unsigned num_buffers, - const void * const *buffers, - const unsigned *sizes) + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, unsigned num_buffers, + const void *const *buffers, const unsigned *sizes) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; - unsigned i; - - assert(decoder); - - if (!dec->bs_ptr) - return; - - for (i = 0; i < num_buffers; ++i) { - struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; - unsigned new_size = dec->bs_size + sizes[i]; - - if (new_size > buf->res->buf->size) { - dec->ws->buffer_unmap(buf->res->buf); - if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { - RVID_ERR("Can't resize bitstream buffer!"); - return; - } - - dec->bs_ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dec->bs_ptr) - return; - - dec->bs_ptr += dec->bs_size; - } - - memcpy(dec->bs_ptr, buffers[i], sizes[i]); - dec->bs_size += sizes[i]; - dec->bs_ptr += sizes[i]; - } + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; + unsigned i; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + for (i = 0; i < num_buffers; ++i) { + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + unsigned new_size = dec->bs_size + sizes[i]; + + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); + return; + } + + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dec->bs_ptr) + return; + + dec->bs_ptr += dec->bs_size; + } + + memcpy(dec->bs_ptr, buffers[i], sizes[i]); + dec->bs_size += sizes[i]; + dec->bs_ptr += sizes[i]; + } } /** * send cmd for vcn dec */ -void send_cmd_dec(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct pb_buffer *dt; - struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf; - - msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; - bs_buf = &dec->bs_buffers[dec->cur_buffer]; - - memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); - dec->ws->buffer_unmap(bs_buf->res->buf); - - map_msg_fb_it_probs_buf(dec); - dt = rvcn_dec_message_decode(dec, target, picture); - rvcn_dec_message_feedback(dec); - send_msg_buf(dec); - - send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - if (dec->ctx.res) - send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, - RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, - 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, - RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); - send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); - if (have_it(dec)) - send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - else if (have_probs(dec)) - send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf, - FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); - set_reg(dec, dec->reg.cntl, 1); + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf; + + msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_probs_buf(dec); + dt = rvcn_dec_message_decode(dec, target, picture); + rvcn_dec_message_feedback(dec); + send_msg_buf(dec); + + send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + if (dec->ctx.res) + send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ, + RADEON_DOMAIN_GTT); + send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET, + RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf, + FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + else if (have_probs(dec)) + send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf, + FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + set_reg(dec, dec->reg.cntl, 1); } /** * end decoding of the current frame */ -static void radeon_dec_end_frame(struct pipe_video_codec *decoder, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture) +static void radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) { - struct radeon_decoder *dec = (struct radeon_decoder*)decoder; + struct radeon_decoder *dec = (struct radeon_decoder *)decoder; - assert(decoder); + assert(decoder); - if (!dec->bs_ptr) - return; + if (!dec->bs_ptr) + return; - dec->send_cmd(dec, target, picture); - flush(dec, PIPE_FLUSH_ASYNC); - next_buffer(dec); + dec->send_cmd(dec, target, picture); + flush(dec, PIPE_FLUSH_ASYNC); + next_buffer(dec); } /** @@ -1467,189 +1430,189 @@ static void radeon_dec_flush(struct pipe_video_codec *decoder) * create and HW decoder */ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ) + const struct pipe_video_codec *templ) { - struct si_context *sctx = (struct si_context*)context; - struct radeon_winsys *ws = sctx->ws; - unsigned width = templ->width, height = templ->height; - unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC; - struct radeon_decoder *dec; - int r, i; - - switch(u_reduce_video_profile(templ->profile)) { - case PIPE_VIDEO_FORMAT_MPEG12: - if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) - return vl_create_mpeg12_decoder(context, templ); - stream_type = RDECODE_CODEC_MPEG2_VLD; - break; - case PIPE_VIDEO_FORMAT_MPEG4: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - stream_type = RDECODE_CODEC_MPEG4; - break; - case PIPE_VIDEO_FORMAT_VC1: - stream_type = RDECODE_CODEC_VC1; - break; - case PIPE_VIDEO_FORMAT_MPEG4_AVC: - width = align(width, VL_MACROBLOCK_WIDTH); - height = align(height, VL_MACROBLOCK_HEIGHT); - stream_type = RDECODE_CODEC_H264_PERF; - break; - case PIPE_VIDEO_FORMAT_HEVC: - stream_type = RDECODE_CODEC_H265; - break; - case PIPE_VIDEO_FORMAT_VP9: - stream_type = RDECODE_CODEC_VP9; - break; - case PIPE_VIDEO_FORMAT_JPEG: - stream_type = RDECODE_CODEC_JPEG; - ring = RING_VCN_JPEG; - break; - default: - assert(0); - break; - } - - dec = CALLOC_STRUCT(radeon_decoder); - - if (!dec) - return NULL; - - dec->base = *templ; - dec->base.context = context; - dec->base.width = width; - dec->base.height = height; - - dec->base.destroy = radeon_dec_destroy; - dec->base.begin_frame = radeon_dec_begin_frame; - dec->base.decode_macroblock = radeon_dec_decode_macroblock; - dec->base.decode_bitstream = radeon_dec_decode_bitstream; - dec->base.end_frame = radeon_dec_end_frame; - dec->base.flush = radeon_dec_flush; - - dec->stream_type = stream_type; - dec->stream_handle = si_vid_alloc_stream_handle(); - dec->screen = context->screen; - dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); - if (!dec->cs) { - RVID_ERR("Can't get command submission context.\n"); - goto error; - } - - for (i = 0; i < 16; i++) - dec->render_pic_list[i] = NULL; - bs_buf_size = width * height * (512 / (16 * 16)); - for (i = 0; i < NUM_BUFFERS; ++i) { - unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; - if (have_it(dec)) - msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE; - else if (have_probs(dec)) - msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE; - /* use vram to improve performance, workaround an unknown bug */ - if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], - msg_fb_it_probs_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated message buffers.\n"); - goto error; - } - - if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], - bs_buf_size, PIPE_USAGE_STAGING)) { - RVID_ERR("Can't allocated bitstream buffers.\n"); - goto error; - } - - si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]); - si_vid_clear_buffer(context, &dec->bs_buffers[i]); - - if (have_probs(dec)) { - struct rvid_buffer* buf; - void *ptr; - - buf = &dec->msg_fb_it_probs_buffers[i]; - ptr = dec->ws->buffer_map( - buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; - fill_probs_table(ptr); - dec->ws->buffer_unmap(buf->res->buf); - } - } - - dpb_size = calc_dpb_size(dec); - if (dpb_size) { - if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated dpb.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->dpb); - } - - if (dec->stream_type == RDECODE_CODEC_H264_PERF) { - unsigned ctx_size = calc_ctx_size_h264_perf(dec); - if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->ctx); - } - - if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, - RDECODE_SESSION_CONTEXT_SIZE, - PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated session ctx.\n"); - goto error; - } - si_vid_clear_buffer(context, &dec->sessionctx); - - if (sctx->family == CHIP_ARCTURUS) { - dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL; - } else if (sctx->family >= CHIP_NAVI10) { - dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL; - dec->jpg.direct_reg = true; - } else { - dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0; - dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1; - dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD; - dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL; - dec->jpg.direct_reg = false; - } - - map_msg_fb_it_probs_buf(dec); - rvcn_dec_message_create(dec); - send_msg_buf(dec); - r = flush(dec, 0); - if (r) - goto error; - - next_buffer(dec); - - if (stream_type == RDECODE_CODEC_JPEG) - dec->send_cmd = send_cmd_jpeg; - else - dec->send_cmd = send_cmd_dec; - - return &dec->base; + struct si_context *sctx = (struct si_context *)context; + struct radeon_winsys *ws = sctx->ws; + unsigned width = templ->width, height = templ->height; + unsigned dpb_size, bs_buf_size, stream_type = 0, ring = RING_VCN_DEC; + struct radeon_decoder *dec; + int r, i; + + switch (u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + return vl_create_mpeg12_decoder(context, templ); + stream_type = RDECODE_CODEC_MPEG2_VLD; + break; + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + stream_type = RDECODE_CODEC_MPEG4; + break; + case PIPE_VIDEO_FORMAT_VC1: + stream_type = RDECODE_CODEC_VC1; + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + stream_type = RDECODE_CODEC_H264_PERF; + break; + case PIPE_VIDEO_FORMAT_HEVC: + stream_type = RDECODE_CODEC_H265; + break; + case PIPE_VIDEO_FORMAT_VP9: + stream_type = RDECODE_CODEC_VP9; + break; + case PIPE_VIDEO_FORMAT_JPEG: + stream_type = RDECODE_CODEC_JPEG; + ring = RING_VCN_JPEG; + break; + default: + assert(0); + break; + } + + dec = CALLOC_STRUCT(radeon_decoder); + + if (!dec) + return NULL; + + dec->base = *templ; + dec->base.context = context; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = radeon_dec_destroy; + dec->base.begin_frame = radeon_dec_begin_frame; + dec->base.decode_macroblock = radeon_dec_decode_macroblock; + dec->base.decode_bitstream = radeon_dec_decode_bitstream; + dec->base.end_frame = radeon_dec_end_frame; + dec->base.flush = radeon_dec_flush; + + dec->stream_type = stream_type; + dec->stream_handle = si_vid_alloc_stream_handle(); + dec->screen = context->screen; + dec->ws = ws; + dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); + if (!dec->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + for (i = 0; i < 16; i++) + dec->render_pic_list[i] = NULL; + bs_buf_size = width * height * (512 / (16 * 16)); + for (i = 0; i < NUM_BUFFERS; ++i) { + unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + if (have_it(dec)) + msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE; + else if (have_probs(dec)) + msg_fb_it_probs_size += VP9_PROBS_TABLE_SIZE; + /* use vram to improve performance, workaround an unknown bug */ + if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], msg_fb_it_probs_size, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated message buffers.\n"); + goto error; + } + + if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size, + PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); + goto error; + } + + si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]); + si_vid_clear_buffer(context, &dec->bs_buffers[i]); + + if (have_probs(dec)) { + struct rvid_buffer *buf; + void *ptr; + + buf = &dec->msg_fb_it_probs_buffers[i]; + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; + fill_probs_table(ptr); + dec->ws->buffer_unmap(buf->res->buf); + } + } + + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->dpb); + } + + if (dec->stream_type == RDECODE_CODEC_H264_PERF) { + unsigned ctx_size = calc_ctx_size_h264_perf(dec); + if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->ctx); + } + + if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, RDECODE_SESSION_CONTEXT_SIZE, + PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated session ctx.\n"); + goto error; + } + si_vid_clear_buffer(context, &dec->sessionctx); + + if (sctx->family == CHIP_ARCTURUS) { + dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL; + dec->jpg.direct_reg = true; + } else if (sctx->family >= CHIP_NAVI10 || sctx->family == CHIP_RENOIR) { + dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL; + dec->jpg.direct_reg = true; + } else { + dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0; + dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1; + dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD; + dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL; + dec->jpg.direct_reg = false; + } + + map_msg_fb_it_probs_buf(dec); + rvcn_dec_message_create(dec); + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + + next_buffer(dec); + + if (stream_type == RDECODE_CODEC_JPEG) + dec->send_cmd = send_cmd_jpeg; + else + dec->send_cmd = send_cmd_dec; + + return &dec->base; error: - if (dec->cs) dec->ws->cs_destroy(dec->cs); + if (dec->cs) + dec->ws->cs_destroy(dec->cs); - for (i = 0; i < NUM_BUFFERS; ++i) { - si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); - si_vid_destroy_buffer(&dec->bs_buffers[i]); - } + for (i = 0; i < NUM_BUFFERS; ++i) { + si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]); + si_vid_destroy_buffer(&dec->bs_buffers[i]); + } - si_vid_destroy_buffer(&dec->dpb); - si_vid_destroy_buffer(&dec->ctx); - si_vid_destroy_buffer(&dec->sessionctx); + si_vid_destroy_buffer(&dec->dpb); + si_vid_destroy_buffer(&dec->ctx); + si_vid_destroy_buffer(&dec->sessionctx); - FREE(dec); + FREE(dec); - return NULL; + return NULL; } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h index f079b94df..5a080fdb4 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vcn_dec.h @@ -28,783 +28,780 @@ #ifndef _RADEON_VCN_DEC_H #define _RADEON_VCN_DEC_H -#define RDECODE_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) -#define RDECODE_PKT_TYPE_G(x) (((x) >> 30) & 0x3) -#define RDECODE_PKT_TYPE_C 0x3FFFFFFF -#define RDECODE_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) -#define RDECODE_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) -#define RDECODE_PKT_COUNT_C 0xC000FFFF -#define RDECODE_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0) -#define RDECODE_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) -#define RDECODE_PKT0_BASE_INDEX_C 0xFFFF0000 -#define RDECODE_PKT0(index, count) (RDECODE_PKT_TYPE_S(0) | \ - RDECODE_PKT0_BASE_INDEX_S(index) | \ - RDECODE_PKT_COUNT_S(count)) - -#define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2)) - -#define RDECODE_PKT_REG_J(x) ((unsigned)(x) & 0x3FFFF) -#define RDECODE_PKT_RES_J(x) (((unsigned)(x) & 0x3F) << 18) -#define RDECODE_PKT_COND_J(x) (((unsigned)(x) & 0xF) << 24) -#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x) & 0xF) << 28) -#define RDECODE_PKTJ(reg, cond, type) (RDECODE_PKT_REG_J(reg) | \ - RDECODE_PKT_RES_J(0) | \ - RDECODE_PKT_COND_J(cond) | \ - RDECODE_PKT_TYPE_J(type)) - -#define RDECODE_CMD_MSG_BUFFER 0x00000000 -#define RDECODE_CMD_DPB_BUFFER 0x00000001 -#define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 -#define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003 -#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004 -#define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 -#define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100 -#define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 -#define RDECODE_CMD_CONTEXT_BUFFER 0x00000206 - -#define RDECODE_MSG_CREATE 0x00000000 -#define RDECODE_MSG_DECODE 0x00000001 -#define RDECODE_MSG_DESTROY 0x00000002 - -#define RDECODE_CODEC_H264 0x00000000 -#define RDECODE_CODEC_VC1 0x00000001 -#define RDECODE_CODEC_MPEG2_VLD 0x00000003 -#define RDECODE_CODEC_MPEG4 0x00000004 -#define RDECODE_CODEC_H264_PERF 0x00000007 -#define RDECODE_CODEC_JPEG 0x00000008 -#define RDECODE_CODEC_H265 0x00000010 -#define RDECODE_CODEC_VP9 0x00000011 - -#define RDECODE_ARRAY_MODE_LINEAR 0x00000000 -#define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 -#define RDECODE_ARRAY_MODE_1D_THIN 0x00000002 -#define RDECODE_ARRAY_MODE_2D_THIN 0x00000004 -#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 -#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 - -#define RDECODE_H264_PROFILE_BASELINE 0x00000000 -#define RDECODE_H264_PROFILE_MAIN 0x00000001 -#define RDECODE_H264_PROFILE_HIGH 0x00000002 -#define RDECODE_H264_PROFILE_STEREO_HIGH 0x00000003 -#define RDECODE_H264_PROFILE_MVC 0x00000004 - -#define RDECODE_VC1_PROFILE_SIMPLE 0x00000000 -#define RDECODE_VC1_PROFILE_MAIN 0x00000001 -#define RDECODE_VC1_PROFILE_ADVANCED 0x00000002 - -#define RDECODE_SW_MODE_LINEAR 0x00000000 -#define RDECODE_256B_S 0x00000001 -#define RDECODE_256B_D 0x00000002 -#define RDECODE_4KB_S 0x00000005 -#define RDECODE_4KB_D 0x00000006 -#define RDECODE_64KB_S 0x00000009 -#define RDECODE_64KB_D 0x0000000A -#define RDECODE_4KB_S_X 0x00000015 -#define RDECODE_4KB_D_X 0x00000016 -#define RDECODE_64KB_S_X 0x00000019 -#define RDECODE_64KB_D_X 0x0000001A - -#define RDECODE_MESSAGE_NOT_SUPPORTED 0x00000000 -#define RDECODE_MESSAGE_CREATE 0x00000001 -#define RDECODE_MESSAGE_DECODE 0x00000002 -#define RDECODE_MESSAGE_AVC 0x00000006 -#define RDECODE_MESSAGE_VC1 0x00000007 -#define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A -#define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B -#define RDECODE_MESSAGE_HEVC 0x0000000D -#define RDECODE_MESSAGE_VP9 0x0000000E - -#define RDECODE_FEEDBACK_PROFILING 0x00000001 - -#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7 - -#define NUM_BUFFERS 4 - -#define RDECODE_VP9_PROBS_DATA_SIZE 2304 - -#define mmUVD_JPEG_CNTL 0x0200 -#define mmUVD_JPEG_CNTL_BASE_IDX 1 -#define mmUVD_JPEG_RB_BASE 0x0201 -#define mmUVD_JPEG_RB_BASE_BASE_IDX 1 -#define mmUVD_JPEG_RB_WPTR 0x0202 -#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1 -#define mmUVD_JPEG_RB_RPTR 0x0203 -#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1 -#define mmUVD_JPEG_RB_SIZE 0x0204 -#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 -#define mmUVD_JPEG_TIER_CNTL2 0x021a -#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1 -#define mmUVD_JPEG_UV_TILING_CTRL 0x021c -#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1 -#define mmUVD_JPEG_TILING_CTRL 0x021e -#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1 -#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 -#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1 -#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 -#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1 -#define mmUVD_JPEG_PITCH 0x0222 -#define mmUVD_JPEG_PITCH_BASE_IDX 1 -#define mmUVD_JPEG_INT_EN 0x0229 -#define mmUVD_JPEG_INT_EN_BASE_IDX 1 -#define mmUVD_JPEG_UV_PITCH 0x022b -#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1 -#define mmUVD_JPEG_INDEX 0x023e -#define mmUVD_JPEG_INDEX_BASE_IDX 1 -#define mmUVD_JPEG_DATA 0x023f -#define mmUVD_JPEG_DATA_BASE_IDX 1 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 -#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1 -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1 -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b -#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1 -#define mmUVD_CTX_INDEX 0x0528 -#define mmUVD_CTX_INDEX_BASE_IDX 1 -#define mmUVD_CTX_DATA 0x0529 -#define mmUVD_CTX_DATA_BASE_IDX 1 -#define mmUVD_SOFT_RESET 0x05a0 -#define mmUVD_SOFT_RESET_BASE_IDX 1 - -#define vcnipUVD_JPEG_DEC_SOFT_RST 0x402f -#define vcnipUVD_JRBC_IB_COND_RD_TIMER 0x408e -#define vcnipUVD_JRBC_IB_REF_DATA 0x408f -#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x40e1 -#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x40e0 -#define vcnipUVD_JPEG_RB_BASE 0x4001 -#define vcnipUVD_JPEG_RB_SIZE 0x4004 -#define vcnipUVD_JPEG_RB_WPTR 0x4002 -#define vcnipUVD_JPEG_PITCH 0x401f -#define vcnipUVD_JPEG_UV_PITCH 0x4020 -#define vcnipJPEG_DEC_ADDR_MODE 0x4027 -#define vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE 0x4024 -#define vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE 0x4025 -#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x40e3 -#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x40e2 -#define vcnipUVD_JPEG_INDEX 0x402c -#define vcnipUVD_JPEG_DATA 0x402d -#define vcnipUVD_JPEG_TIER_CNTL2 0x400f -#define vcnipUVD_JPEG_OUTBUF_RPTR 0x401e -#define vcnipUVD_JPEG_OUTBUF_CNTL 0x401c -#define vcnipUVD_JPEG_INT_EN 0x400a -#define vcnipUVD_JPEG_CNTL 0x4000 -#define vcnipUVD_JPEG_RB_RPTR 0x4003 -#define vcnipUVD_JPEG_OUTBUF_WPTR 0x401d - -#define UVD_BASE_INST0_SEG0 0x00007800 -#define UVD_BASE_INST0_SEG1 0x00007E00 -#define UVD_BASE_INST0_SEG2 0 -#define UVD_BASE_INST0_SEG3 0 -#define UVD_BASE_INST0_SEG4 0 - -#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) - -#define COND0 0 -#define COND1 1 -#define COND2 2 -#define COND3 3 -#define COND4 4 -#define COND5 5 -#define COND6 6 -#define COND7 7 - -#define TYPE0 0 -#define TYPE1 1 -#define TYPE2 2 -#define TYPE3 3 -#define TYPE4 4 -#define TYPE5 5 -#define TYPE6 6 -#define TYPE7 7 +#include "radeon_video.h" + +#define RDECODE_PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) +#define RDECODE_PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define RDECODE_PKT_TYPE_C 0x3FFFFFFF +#define RDECODE_PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) +#define RDECODE_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define RDECODE_PKT_COUNT_C 0xC000FFFF +#define RDECODE_PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0) +#define RDECODE_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define RDECODE_PKT0_BASE_INDEX_C 0xFFFF0000 +#define RDECODE_PKT0(index, count) \ + (RDECODE_PKT_TYPE_S(0) | RDECODE_PKT0_BASE_INDEX_S(index) | RDECODE_PKT_COUNT_S(count)) + +#define RDECODE_PKT2() (RDECODE_PKT_TYPE_S(2)) + +#define RDECODE_PKT_REG_J(x) ((unsigned)(x)&0x3FFFF) +#define RDECODE_PKT_RES_J(x) (((unsigned)(x)&0x3F) << 18) +#define RDECODE_PKT_COND_J(x) (((unsigned)(x)&0xF) << 24) +#define RDECODE_PKT_TYPE_J(x) (((unsigned)(x)&0xF) << 28) +#define RDECODE_PKTJ(reg, cond, type) \ + (RDECODE_PKT_REG_J(reg) | RDECODE_PKT_RES_J(0) | RDECODE_PKT_COND_J(cond) | \ + RDECODE_PKT_TYPE_J(type)) + +#define RDECODE_CMD_MSG_BUFFER 0x00000000 +#define RDECODE_CMD_DPB_BUFFER 0x00000001 +#define RDECODE_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define RDECODE_CMD_FEEDBACK_BUFFER 0x00000003 +#define RDECODE_CMD_PROB_TBL_BUFFER 0x00000004 +#define RDECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define RDECODE_CMD_BITSTREAM_BUFFER 0x00000100 +#define RDECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204 +#define RDECODE_CMD_CONTEXT_BUFFER 0x00000206 + +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MSG_DECODE 0x00000001 +#define RDECODE_MSG_DESTROY 0x00000002 + +#define RDECODE_CODEC_H264 0x00000000 +#define RDECODE_CODEC_VC1 0x00000001 +#define RDECODE_CODEC_MPEG2_VLD 0x00000003 +#define RDECODE_CODEC_MPEG4 0x00000004 +#define RDECODE_CODEC_H264_PERF 0x00000007 +#define RDECODE_CODEC_JPEG 0x00000008 +#define RDECODE_CODEC_H265 0x00000010 +#define RDECODE_CODEC_VP9 0x00000011 + +#define RDECODE_ARRAY_MODE_LINEAR 0x00000000 +#define RDECODE_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 +#define RDECODE_ARRAY_MODE_1D_THIN 0x00000002 +#define RDECODE_ARRAY_MODE_2D_THIN 0x00000004 +#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 +#define RDECODE_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 + +#define RDECODE_H264_PROFILE_BASELINE 0x00000000 +#define RDECODE_H264_PROFILE_MAIN 0x00000001 +#define RDECODE_H264_PROFILE_HIGH 0x00000002 +#define RDECODE_H264_PROFILE_STEREO_HIGH 0x00000003 +#define RDECODE_H264_PROFILE_MVC 0x00000004 + +#define RDECODE_VC1_PROFILE_SIMPLE 0x00000000 +#define RDECODE_VC1_PROFILE_MAIN 0x00000001 +#define RDECODE_VC1_PROFILE_ADVANCED 0x00000002 + +#define RDECODE_SW_MODE_LINEAR 0x00000000 +#define RDECODE_256B_S 0x00000001 +#define RDECODE_256B_D 0x00000002 +#define RDECODE_4KB_S 0x00000005 +#define RDECODE_4KB_D 0x00000006 +#define RDECODE_64KB_S 0x00000009 +#define RDECODE_64KB_D 0x0000000A +#define RDECODE_4KB_S_X 0x00000015 +#define RDECODE_4KB_D_X 0x00000016 +#define RDECODE_64KB_S_X 0x00000019 +#define RDECODE_64KB_D_X 0x0000001A + +#define RDECODE_MESSAGE_NOT_SUPPORTED 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 +#define RDECODE_MESSAGE_DECODE 0x00000002 +#define RDECODE_MESSAGE_AVC 0x00000006 +#define RDECODE_MESSAGE_VC1 0x00000007 +#define RDECODE_MESSAGE_MPEG2_VLD 0x0000000A +#define RDECODE_MESSAGE_MPEG4_ASP_VLD 0x0000000B +#define RDECODE_MESSAGE_HEVC 0x0000000D +#define RDECODE_MESSAGE_VP9 0x0000000E + +#define RDECODE_FEEDBACK_PROFILING 0x00000001 + +#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT 7 + +#define NUM_BUFFERS 4 + +#define RDECODE_VP9_PROBS_DATA_SIZE 2304 + +#define mmUVD_JPEG_CNTL 0x0200 +#define mmUVD_JPEG_CNTL_BASE_IDX 1 +#define mmUVD_JPEG_RB_BASE 0x0201 +#define mmUVD_JPEG_RB_BASE_BASE_IDX 1 +#define mmUVD_JPEG_RB_WPTR 0x0202 +#define mmUVD_JPEG_RB_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_RPTR 0x0203 +#define mmUVD_JPEG_RB_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_RB_SIZE 0x0204 +#define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 +#define mmUVD_JPEG_TIER_CNTL2 0x021a +#define mmUVD_JPEG_TIER_CNTL2_BASE_IDX 1 +#define mmUVD_JPEG_UV_TILING_CTRL 0x021c +#define mmUVD_JPEG_UV_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_TILING_CTRL 0x021e +#define mmUVD_JPEG_TILING_CTRL_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_RPTR 0x0220 +#define mmUVD_JPEG_OUTBUF_RPTR_BASE_IDX 1 +#define mmUVD_JPEG_OUTBUF_WPTR 0x0221 +#define mmUVD_JPEG_OUTBUF_WPTR_BASE_IDX 1 +#define mmUVD_JPEG_PITCH 0x0222 +#define mmUVD_JPEG_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INT_EN 0x0229 +#define mmUVD_JPEG_INT_EN_BASE_IDX 1 +#define mmUVD_JPEG_UV_PITCH 0x022b +#define mmUVD_JPEG_UV_PITCH_BASE_IDX 1 +#define mmUVD_JPEG_INDEX 0x023e +#define mmUVD_JPEG_INDEX_BASE_IDX 1 +#define mmUVD_JPEG_DATA 0x023f +#define mmUVD_JPEG_DATA_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x0438 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x0439 +#define mmUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x045a +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x045b +#define mmUVD_LMI_JPEG_READ_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_CTX_INDEX 0x0528 +#define mmUVD_CTX_INDEX_BASE_IDX 1 +#define mmUVD_CTX_DATA 0x0529 +#define mmUVD_CTX_DATA_BASE_IDX 1 +#define mmUVD_SOFT_RESET 0x05a0 +#define mmUVD_SOFT_RESET_BASE_IDX 1 + +#define vcnipUVD_JPEG_DEC_SOFT_RST 0x402f +#define vcnipUVD_JRBC_IB_COND_RD_TIMER 0x408e +#define vcnipUVD_JRBC_IB_REF_DATA 0x408f +#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH 0x40e1 +#define vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW 0x40e0 +#define vcnipUVD_JPEG_RB_BASE 0x4001 +#define vcnipUVD_JPEG_RB_SIZE 0x4004 +#define vcnipUVD_JPEG_RB_WPTR 0x4002 +#define vcnipUVD_JPEG_PITCH 0x401f +#define vcnipUVD_JPEG_UV_PITCH 0x4020 +#define vcnipJPEG_DEC_ADDR_MODE 0x4027 +#define vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE 0x4024 +#define vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE 0x4025 +#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x40e3 +#define vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x40e2 +#define vcnipUVD_JPEG_INDEX 0x402c +#define vcnipUVD_JPEG_DATA 0x402d +#define vcnipUVD_JPEG_TIER_CNTL2 0x400f +#define vcnipUVD_JPEG_OUTBUF_RPTR 0x401e +#define vcnipUVD_JPEG_OUTBUF_CNTL 0x401c +#define vcnipUVD_JPEG_INT_EN 0x400a +#define vcnipUVD_JPEG_CNTL 0x4000 +#define vcnipUVD_JPEG_RB_RPTR 0x4003 +#define vcnipUVD_JPEG_OUTBUF_WPTR 0x401d + +#define UVD_BASE_INST0_SEG0 0x00007800 +#define UVD_BASE_INST0_SEG1 0x00007E00 +#define UVD_BASE_INST0_SEG2 0 +#define UVD_BASE_INST0_SEG3 0 +#define UVD_BASE_INST0_SEG4 0 + +#define SOC15_REG_ADDR(reg) (UVD_BASE_INST0_SEG1 + reg) + +#define COND0 0 +#define COND1 1 +#define COND2 2 +#define COND3 3 +#define COND4 4 +#define COND5 5 +#define COND6 6 +#define COND7 7 + +#define TYPE0 0 +#define TYPE1 1 +#define TYPE2 2 +#define TYPE3 3 +#define TYPE4 4 +#define TYPE5 5 +#define TYPE6 6 +#define TYPE7 7 /* VP9 Frame header flags */ -#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6) -#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5) -#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4) -#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3) -#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1) -#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0) - -#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000) -#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100) -#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040) -#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020) -#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010) -#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008) -#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004) -#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002) -#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001) +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT (13) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT (12) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT (11) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT (10) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT (9) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT (8) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT (7) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT (6) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT (5) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT (4) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT (3) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT (2) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT (1) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_SHIFT (0) + +#define RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK (0x00002000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK (0x00001000) +#define RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK (0x00000800) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK (0x00000400) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK (0x00000200) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK (0x00000100) +#define RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK (0x00000080) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK (0x00000040) +#define RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK (0x00000020) +#define RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK (0x00000010) +#define RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK (0x00000008) +#define RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK (0x00000004) +#define RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK (0x00000002) +#define RDECODE_FRAME_HDR_INFO_VP9_SHOW_EXISTING_FRAME_MASK (0x00000001) typedef struct rvcn_dec_message_index_s { - unsigned int message_id; - unsigned int offset; - unsigned int size; - unsigned int filled; + unsigned int message_id; + unsigned int offset; + unsigned int size; + unsigned int filled; } rvcn_dec_message_index_t; typedef struct rvcn_dec_message_header_s { - unsigned int header_size; - unsigned int total_size; - unsigned int num_buffers; - unsigned int msg_type; - unsigned int stream_handle; - unsigned int status_report_feedback_number; - - rvcn_dec_message_index_t index[1]; + unsigned int header_size; + unsigned int total_size; + unsigned int num_buffers; + unsigned int msg_type; + unsigned int stream_handle; + unsigned int status_report_feedback_number; + + rvcn_dec_message_index_t index[1]; } rvcn_dec_message_header_t; typedef struct rvcn_dec_message_create_s { - unsigned int stream_type; - unsigned int session_flags; - unsigned int width_in_samples; - unsigned int height_in_samples; + unsigned int stream_type; + unsigned int session_flags; + unsigned int width_in_samples; + unsigned int height_in_samples; } rvcn_dec_message_create_t; typedef struct rvcn_dec_message_decode_s { - unsigned int stream_type; - unsigned int decode_flags; - unsigned int width_in_samples; - unsigned int height_in_samples; - - unsigned int bsd_size; - unsigned int dpb_size; - unsigned int dt_size; - unsigned int sct_size; - unsigned int sc_coeff_size; - unsigned int hw_ctxt_size; - unsigned int sw_ctxt_size; - unsigned int pic_param_size; - unsigned int mb_cntl_size; - unsigned int reserved0[4]; - unsigned int decode_buffer_flags; - - unsigned int db_pitch; - unsigned int db_aligned_height; - unsigned int db_tiling_mode; - unsigned int db_swizzle_mode; - unsigned int db_array_mode; - unsigned int db_field_mode; - unsigned int db_surf_tile_config; - - unsigned int dt_pitch; - unsigned int dt_uv_pitch; - unsigned int dt_tiling_mode; - unsigned int dt_swizzle_mode; - unsigned int dt_array_mode; - unsigned int dt_field_mode; - unsigned int dt_out_format; - unsigned int dt_surf_tile_config; - unsigned int dt_uv_surf_tile_config; - unsigned int dt_luma_top_offset; - unsigned int dt_luma_bottom_offset; - unsigned int dt_chroma_top_offset; - unsigned int dt_chroma_bottom_offset; - unsigned int dt_chromaV_top_offset; - unsigned int dt_chromaV_bottom_offset; - - unsigned char dpbRefArraySlice[16]; - unsigned char dpbCurArraySlice; - unsigned char dpbReserved[3]; + unsigned int stream_type; + unsigned int decode_flags; + unsigned int width_in_samples; + unsigned int height_in_samples; + + unsigned int bsd_size; + unsigned int dpb_size; + unsigned int dt_size; + unsigned int sct_size; + unsigned int sc_coeff_size; + unsigned int hw_ctxt_size; + unsigned int sw_ctxt_size; + unsigned int pic_param_size; + unsigned int mb_cntl_size; + unsigned int reserved0[4]; + unsigned int decode_buffer_flags; + + unsigned int db_pitch; + unsigned int db_aligned_height; + unsigned int db_tiling_mode; + unsigned int db_swizzle_mode; + unsigned int db_array_mode; + unsigned int db_field_mode; + unsigned int db_surf_tile_config; + + unsigned int dt_pitch; + unsigned int dt_uv_pitch; + unsigned int dt_tiling_mode; + unsigned int dt_swizzle_mode; + unsigned int dt_array_mode; + unsigned int dt_field_mode; + unsigned int dt_out_format; + unsigned int dt_surf_tile_config; + unsigned int dt_uv_surf_tile_config; + unsigned int dt_luma_top_offset; + unsigned int dt_luma_bottom_offset; + unsigned int dt_chroma_top_offset; + unsigned int dt_chroma_bottom_offset; + unsigned int dt_chromaV_top_offset; + unsigned int dt_chromaV_bottom_offset; + + unsigned char dpbRefArraySlice[16]; + unsigned char dpbCurArraySlice; + unsigned char dpbReserved[3]; } rvcn_dec_message_decode_t; typedef struct { - unsigned short viewOrderIndex; - unsigned short viewId; - unsigned short numOfAnchorRefsInL0; - unsigned short viewIdOfAnchorRefsInL0[15]; - unsigned short numOfAnchorRefsInL1; - unsigned short viewIdOfAnchorRefsInL1[15]; - unsigned short numOfNonAnchorRefsInL0; - unsigned short viewIdOfNonAnchorRefsInL0[15]; - unsigned short numOfNonAnchorRefsInL1; - unsigned short viewIdOfNonAnchorRefsInL1[15]; + unsigned short viewOrderIndex; + unsigned short viewId; + unsigned short numOfAnchorRefsInL0; + unsigned short viewIdOfAnchorRefsInL0[15]; + unsigned short numOfAnchorRefsInL1; + unsigned short viewIdOfAnchorRefsInL1[15]; + unsigned short numOfNonAnchorRefsInL0; + unsigned short viewIdOfNonAnchorRefsInL0[15]; + unsigned short numOfNonAnchorRefsInL1; + unsigned short viewIdOfNonAnchorRefsInL1[15]; } radeon_mvcElement_t; typedef struct rvcn_dec_message_avc_s { - unsigned int profile; - unsigned int level; - - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned char chroma_format; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - unsigned char log2_max_frame_num_minus4; - - unsigned char pic_order_cnt_type; - unsigned char log2_max_pic_order_cnt_lsb_minus4; - unsigned char num_ref_frames; - unsigned char reserved_8bit; - - signed char pic_init_qp_minus26; - signed char pic_init_qs_minus26; - signed char chroma_qp_index_offset; - signed char second_chroma_qp_index_offset; - - unsigned char num_slice_groups_minus1; - unsigned char slice_group_map_type; - unsigned char num_ref_idx_l0_active_minus1; - unsigned char num_ref_idx_l1_active_minus1; - - unsigned short slice_group_change_rate_minus1; - unsigned short reserved_16bit_1; - - unsigned char scaling_list_4x4[6][16]; - unsigned char scaling_list_8x8[2][64]; - - unsigned int frame_num; - unsigned int frame_num_list[16]; - int curr_field_order_cnt_list[2]; - int field_order_cnt_list[16][2]; - - unsigned int decoded_pic_idx; - unsigned int curr_pic_ref_frame_num; - unsigned char ref_frame_list[16]; - - unsigned int reserved[122]; - - struct { - unsigned int numViews; - unsigned int viewId0; - radeon_mvcElement_t mvcElements[1]; - } mvc; + unsigned int profile; + unsigned int level; + + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned char chroma_format; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_frame_num_minus4; + + unsigned char pic_order_cnt_type; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + unsigned char num_ref_frames; + unsigned char reserved_8bit; + + signed char pic_init_qp_minus26; + signed char pic_init_qs_minus26; + signed char chroma_qp_index_offset; + signed char second_chroma_qp_index_offset; + + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + unsigned char num_ref_idx_l0_active_minus1; + unsigned char num_ref_idx_l1_active_minus1; + + unsigned short slice_group_change_rate_minus1; + unsigned short reserved_16bit_1; + + unsigned char scaling_list_4x4[6][16]; + unsigned char scaling_list_8x8[2][64]; + + unsigned int frame_num; + unsigned int frame_num_list[16]; + int curr_field_order_cnt_list[2]; + int field_order_cnt_list[16][2]; + + unsigned int decoded_pic_idx; + unsigned int curr_pic_ref_frame_num; + unsigned char ref_frame_list[16]; + + unsigned int reserved[122]; + + struct { + unsigned int numViews; + unsigned int viewId0; + radeon_mvcElement_t mvcElements[1]; + } mvc; } rvcn_dec_message_avc_t; typedef struct rvcn_dec_message_vc1_s { - unsigned int profile; - unsigned int level; - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned int pic_structure; - unsigned int chroma_format; - unsigned short decoded_pic_idx; - unsigned short deblocked_pic_idx; - unsigned short forward_ref_idx; - unsigned short backward_ref_idx; - unsigned int cached_frame_flag; + unsigned int profile; + unsigned int level; + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned int pic_structure; + unsigned int chroma_format; + unsigned short decoded_pic_idx; + unsigned short deblocked_pic_idx; + unsigned short forward_ref_idx; + unsigned short backward_ref_idx; + unsigned int cached_frame_flag; } rvcn_dec_message_vc1_t; typedef struct rvcn_dec_message_mpeg2_vld_s { - unsigned int decoded_pic_idx; - unsigned int forward_ref_pic_idx; - unsigned int backward_ref_pic_idx; - - unsigned char load_intra_quantiser_matrix; - unsigned char load_nonintra_quantiser_matrix; - unsigned char reserved_quantiser_alignement[2]; - unsigned char intra_quantiser_matrix[64]; - unsigned char nonintra_quantiser_matrix[64]; - - unsigned char profile_and_level_indication; - unsigned char chroma_format; - - unsigned char picture_coding_type; - - unsigned char reserved_1; - - unsigned char f_code[2][2]; - unsigned char intra_dc_precision; - unsigned char pic_structure; - unsigned char top_field_first; - unsigned char frame_pred_frame_dct; - unsigned char concealment_motion_vectors; - unsigned char q_scale_type; - unsigned char intra_vlc_format; - unsigned char alternate_scan; + unsigned int decoded_pic_idx; + unsigned int forward_ref_pic_idx; + unsigned int backward_ref_pic_idx; + + unsigned char load_intra_quantiser_matrix; + unsigned char load_nonintra_quantiser_matrix; + unsigned char reserved_quantiser_alignement[2]; + unsigned char intra_quantiser_matrix[64]; + unsigned char nonintra_quantiser_matrix[64]; + + unsigned char profile_and_level_indication; + unsigned char chroma_format; + + unsigned char picture_coding_type; + + unsigned char reserved_1; + + unsigned char f_code[2][2]; + unsigned char intra_dc_precision; + unsigned char pic_structure; + unsigned char top_field_first; + unsigned char frame_pred_frame_dct; + unsigned char concealment_motion_vectors; + unsigned char q_scale_type; + unsigned char intra_vlc_format; + unsigned char alternate_scan; } rvcn_dec_message_mpeg2_vld_t; typedef struct rvcn_dec_message_mpeg4_asp_vld_s { - unsigned int decoded_pic_idx; - unsigned int forward_ref_pic_idx; - unsigned int backward_ref_pic_idx; - - unsigned int variant_type; - unsigned char profile_and_level_indication; - - unsigned char video_object_layer_verid; - unsigned char video_object_layer_shape; - - unsigned char reserved_1; - - unsigned short video_object_layer_width; - unsigned short video_object_layer_height; - - unsigned short vop_time_increment_resolution; - - unsigned short reserved_2; - - struct { - unsigned int short_video_header :1; - unsigned int obmc_disable :1; - unsigned int interlaced :1; - unsigned int load_intra_quant_mat :1; - unsigned int load_nonintra_quant_mat :1; - unsigned int quarter_sample :1; - unsigned int complexity_estimation_disable :1; - unsigned int resync_marker_disable :1; - unsigned int data_partitioned :1; - unsigned int reversible_vlc :1; - unsigned int newpred_enable :1; - unsigned int reduced_resolution_vop_enable :1; - unsigned int scalability :1; - unsigned int is_object_layer_identifier :1; - unsigned int fixed_vop_rate :1; - unsigned int newpred_segment_type :1; - unsigned int reserved_bits :16; - }; - - unsigned char quant_type; - unsigned char reserved_3[3]; - unsigned char intra_quant_mat[64]; - unsigned char nonintra_quant_mat[64]; - - struct { - unsigned char sprite_enable; - - unsigned char reserved_4[3]; - - unsigned short sprite_width; - unsigned short sprite_height; - short sprite_left_coordinate; - short sprite_top_coordinate; - - unsigned char no_of_sprite_warping_points; - unsigned char sprite_warping_accuracy; - unsigned char sprite_brightness_change; - unsigned char low_latency_sprite_enable; - } sprite_config; - - struct { - struct { - unsigned int check_skip :1; - unsigned int switch_rounding :1; - unsigned int t311 :1; - unsigned int reserved_bits :29; - }; - - unsigned char vol_mode; - - unsigned char reserved_5[3]; - } divx_311_config; - - struct { - unsigned char vop_data_present; - unsigned char vop_coding_type; - unsigned char vop_quant; - unsigned char vop_coded; - unsigned char vop_rounding_type; - unsigned char intra_dc_vlc_thr; - unsigned char top_field_first; - unsigned char alternate_vertical_scan_flag; - unsigned char vop_fcode_forward; - unsigned char vop_fcode_backward; - unsigned int TRB[2]; - unsigned int TRD[2]; - } vop; + unsigned int decoded_pic_idx; + unsigned int forward_ref_pic_idx; + unsigned int backward_ref_pic_idx; + + unsigned int variant_type; + unsigned char profile_and_level_indication; + + unsigned char video_object_layer_verid; + unsigned char video_object_layer_shape; + + unsigned char reserved_1; + + unsigned short video_object_layer_width; + unsigned short video_object_layer_height; + + unsigned short vop_time_increment_resolution; + + unsigned short reserved_2; + + struct { + unsigned int short_video_header : 1; + unsigned int obmc_disable : 1; + unsigned int interlaced : 1; + unsigned int load_intra_quant_mat : 1; + unsigned int load_nonintra_quant_mat : 1; + unsigned int quarter_sample : 1; + unsigned int complexity_estimation_disable : 1; + unsigned int resync_marker_disable : 1; + unsigned int data_partitioned : 1; + unsigned int reversible_vlc : 1; + unsigned int newpred_enable : 1; + unsigned int reduced_resolution_vop_enable : 1; + unsigned int scalability : 1; + unsigned int is_object_layer_identifier : 1; + unsigned int fixed_vop_rate : 1; + unsigned int newpred_segment_type : 1; + unsigned int reserved_bits : 16; + }; + + unsigned char quant_type; + unsigned char reserved_3[3]; + unsigned char intra_quant_mat[64]; + unsigned char nonintra_quant_mat[64]; + + struct { + unsigned char sprite_enable; + + unsigned char reserved_4[3]; + + unsigned short sprite_width; + unsigned short sprite_height; + short sprite_left_coordinate; + short sprite_top_coordinate; + + unsigned char no_of_sprite_warping_points; + unsigned char sprite_warping_accuracy; + unsigned char sprite_brightness_change; + unsigned char low_latency_sprite_enable; + } sprite_config; + + struct { + struct { + unsigned int check_skip : 1; + unsigned int switch_rounding : 1; + unsigned int t311 : 1; + unsigned int reserved_bits : 29; + }; + + unsigned char vol_mode; + + unsigned char reserved_5[3]; + } divx_311_config; + + struct { + unsigned char vop_data_present; + unsigned char vop_coding_type; + unsigned char vop_quant; + unsigned char vop_coded; + unsigned char vop_rounding_type; + unsigned char intra_dc_vlc_thr; + unsigned char top_field_first; + unsigned char alternate_vertical_scan_flag; + unsigned char vop_fcode_forward; + unsigned char vop_fcode_backward; + unsigned int TRB[2]; + unsigned int TRD[2]; + } vop; } rvcn_dec_message_mpeg4_asp_vld_t; typedef struct rvcn_dec_message_hevc_s { - unsigned int sps_info_flags; - unsigned int pps_info_flags; - unsigned char chroma_format; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - unsigned char log2_max_pic_order_cnt_lsb_minus4; - - unsigned char sps_max_dec_pic_buffering_minus1; - unsigned char log2_min_luma_coding_block_size_minus3; - unsigned char log2_diff_max_min_luma_coding_block_size; - unsigned char log2_min_transform_block_size_minus2; - - unsigned char log2_diff_max_min_transform_block_size; - unsigned char max_transform_hierarchy_depth_inter; - unsigned char max_transform_hierarchy_depth_intra; - unsigned char pcm_sample_bit_depth_luma_minus1; - - unsigned char pcm_sample_bit_depth_chroma_minus1; - unsigned char log2_min_pcm_luma_coding_block_size_minus3; - unsigned char log2_diff_max_min_pcm_luma_coding_block_size; - unsigned char num_extra_slice_header_bits; - - unsigned char num_short_term_ref_pic_sets; - unsigned char num_long_term_ref_pic_sps; - unsigned char num_ref_idx_l0_default_active_minus1; - unsigned char num_ref_idx_l1_default_active_minus1; - - signed char pps_cb_qp_offset; - signed char pps_cr_qp_offset; - signed char pps_beta_offset_div2; - signed char pps_tc_offset_div2; - - unsigned char diff_cu_qp_delta_depth; - unsigned char num_tile_columns_minus1; - unsigned char num_tile_rows_minus1; - unsigned char log2_parallel_merge_level_minus2; - - unsigned short column_width_minus1[19]; - unsigned short row_height_minus1[21]; - - signed char init_qp_minus26; - unsigned char num_delta_pocs_ref_rps_idx; - unsigned char curr_idx; - unsigned char reserved[1]; - int curr_poc; - unsigned char ref_pic_list[16]; - int poc_list[16]; - unsigned char ref_pic_set_st_curr_before[8]; - unsigned char ref_pic_set_st_curr_after[8]; - unsigned char ref_pic_set_lt_curr[8]; - - unsigned char ucScalingListDCCoefSizeID2[6]; - unsigned char ucScalingListDCCoefSizeID3[2]; - - unsigned char highestTid; - unsigned char isNonRef; - - unsigned char p010_mode; - unsigned char msb_mode; - unsigned char luma_10to8; - unsigned char chroma_10to8; - - unsigned char hevc_reserved[2]; - - unsigned char direct_reflist[2][15]; + unsigned int sps_info_flags; + unsigned int pps_info_flags; + unsigned char chroma_format; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + + unsigned char sps_max_dec_pic_buffering_minus1; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_transform_block_size_minus2; + + unsigned char log2_diff_max_min_transform_block_size; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char pcm_sample_bit_depth_luma_minus1; + + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char num_extra_slice_header_bits; + + unsigned char num_short_term_ref_pic_sets; + unsigned char num_long_term_ref_pic_sps; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + + signed char pps_cb_qp_offset; + signed char pps_cr_qp_offset; + signed char pps_beta_offset_div2; + signed char pps_tc_offset_div2; + + unsigned char diff_cu_qp_delta_depth; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + unsigned char log2_parallel_merge_level_minus2; + + unsigned short column_width_minus1[19]; + unsigned short row_height_minus1[21]; + + signed char init_qp_minus26; + unsigned char num_delta_pocs_ref_rps_idx; + unsigned char curr_idx; + unsigned char reserved[1]; + int curr_poc; + unsigned char ref_pic_list[16]; + int poc_list[16]; + unsigned char ref_pic_set_st_curr_before[8]; + unsigned char ref_pic_set_st_curr_after[8]; + unsigned char ref_pic_set_lt_curr[8]; + + unsigned char ucScalingListDCCoefSizeID2[6]; + unsigned char ucScalingListDCCoefSizeID3[2]; + + unsigned char highestTid; + unsigned char isNonRef; + + unsigned char p010_mode; + unsigned char msb_mode; + unsigned char luma_10to8; + unsigned char chroma_10to8; + + unsigned char hevc_reserved[2]; + + unsigned char direct_reflist[2][15]; } rvcn_dec_message_hevc_t; typedef struct rvcn_dec_message_vp9_s { - unsigned int frame_header_flags; - - unsigned char frame_context_idx; - unsigned char reset_frame_context; - - unsigned char curr_pic_idx; - unsigned char interp_filter; - - unsigned char filter_level; - unsigned char sharpness_level; - unsigned char lf_adj_level[8][4][2]; - unsigned char base_qindex; - signed char y_dc_delta_q; - signed char uv_ac_delta_q; - signed char uv_dc_delta_q; - - unsigned char log2_tile_cols; - unsigned char log2_tile_rows; - unsigned char tx_mode; - unsigned char reference_mode; - unsigned char chroma_format; - - unsigned char ref_frame_map[8]; - - unsigned char frame_refs[3]; - unsigned char ref_frame_sign_bias[3]; - unsigned char frame_to_show; - unsigned char bit_depth_luma_minus8; - unsigned char bit_depth_chroma_minus8; - - unsigned char p010_mode; - unsigned char msb_mode; - unsigned char luma_10to8; - unsigned char chroma_10to8; - - unsigned int vp9_frame_size; - unsigned int compressed_header_size; - unsigned int uncompressed_header_size; + unsigned int frame_header_flags; + + unsigned char frame_context_idx; + unsigned char reset_frame_context; + + unsigned char curr_pic_idx; + unsigned char interp_filter; + + unsigned char filter_level; + unsigned char sharpness_level; + unsigned char lf_adj_level[8][4][2]; + unsigned char base_qindex; + signed char y_dc_delta_q; + signed char uv_ac_delta_q; + signed char uv_dc_delta_q; + + unsigned char log2_tile_cols; + unsigned char log2_tile_rows; + unsigned char tx_mode; + unsigned char reference_mode; + unsigned char chroma_format; + + unsigned char ref_frame_map[8]; + + unsigned char frame_refs[3]; + unsigned char ref_frame_sign_bias[3]; + unsigned char frame_to_show; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + + unsigned char p010_mode; + unsigned char msb_mode; + unsigned char luma_10to8; + unsigned char chroma_10to8; + + unsigned int vp9_frame_size; + unsigned int compressed_header_size; + unsigned int uncompressed_header_size; } rvcn_dec_message_vp9_t; typedef struct rvcn_dec_feature_index_s { - unsigned int feature_id; - unsigned int offset; - unsigned int size; - unsigned int filled; + unsigned int feature_id; + unsigned int offset; + unsigned int size; + unsigned int filled; } rvcn_dec_feature_index_t; typedef struct rvcn_dec_feedback_header_s { - unsigned int header_size; - unsigned int total_size; - unsigned int num_buffers; - unsigned int status_report_feedback_number; - unsigned int status; - unsigned int value; - unsigned int errorBits; - rvcn_dec_feature_index_t index[1]; + unsigned int header_size; + unsigned int total_size; + unsigned int num_buffers; + unsigned int status_report_feedback_number; + unsigned int status; + unsigned int value; + unsigned int errorBits; + rvcn_dec_feature_index_t index[1]; } rvcn_dec_feedback_header_t; typedef struct rvcn_dec_feedback_profiling_s { - unsigned int size; - - unsigned int decodingTime; - unsigned int decodePlusOverhead; - unsigned int masterTimerHits; - unsigned int uvdLBSIREWaitCount; - - unsigned int avgMPCMemLatency; - unsigned int maxMPCMemLatency; - unsigned int uvdMPCLumaHits; - unsigned int uvdMPCLumaHitPend; - unsigned int uvdMPCLumaSearch; - unsigned int uvdMPCChromaHits; - unsigned int uvdMPCChromaHitPend; - unsigned int uvdMPCChromaSearch; - - unsigned int uvdLMIPerfCountLo; - unsigned int uvdLMIPerfCountHi; - unsigned int uvdLMIAvgLatCntrEnvHit; - unsigned int uvdLMILatCntr; - - unsigned int frameCRC0; - unsigned int frameCRC1; - unsigned int frameCRC2; - unsigned int frameCRC3; - - unsigned int uvdLMIPerfMonCtrl; - unsigned int uvdLMILatCtrl; - unsigned int uvdMPCCntl; - unsigned int reserved0[4]; - unsigned int decoderID; - unsigned int codec; - - unsigned int dmaHwCrc32Enable; - unsigned int dmaHwCrc32Value; - unsigned int dmaHwCrc32Value2; + unsigned int size; + + unsigned int decodingTime; + unsigned int decodePlusOverhead; + unsigned int masterTimerHits; + unsigned int uvdLBSIREWaitCount; + + unsigned int avgMPCMemLatency; + unsigned int maxMPCMemLatency; + unsigned int uvdMPCLumaHits; + unsigned int uvdMPCLumaHitPend; + unsigned int uvdMPCLumaSearch; + unsigned int uvdMPCChromaHits; + unsigned int uvdMPCChromaHitPend; + unsigned int uvdMPCChromaSearch; + + unsigned int uvdLMIPerfCountLo; + unsigned int uvdLMIPerfCountHi; + unsigned int uvdLMIAvgLatCntrEnvHit; + unsigned int uvdLMILatCntr; + + unsigned int frameCRC0; + unsigned int frameCRC1; + unsigned int frameCRC2; + unsigned int frameCRC3; + + unsigned int uvdLMIPerfMonCtrl; + unsigned int uvdLMILatCtrl; + unsigned int uvdMPCCntl; + unsigned int reserved0[4]; + unsigned int decoderID; + unsigned int codec; + + unsigned int dmaHwCrc32Enable; + unsigned int dmaHwCrc32Value; + unsigned int dmaHwCrc32Value2; } rvcn_dec_feedback_profiling_t; typedef struct rvcn_dec_vp9_nmv_ctx_mask_s { - unsigned short classes_mask[2]; - unsigned short bits_mask[2]; - unsigned char joints_mask; - unsigned char sign_mask[2]; - unsigned char class0_mask[2]; - unsigned char class0_fp_mask[2]; - unsigned char fp_mask[2]; - unsigned char class0_hp_mask[2]; - unsigned char hp_mask[2]; - unsigned char reserve[11]; + unsigned short classes_mask[2]; + unsigned short bits_mask[2]; + unsigned char joints_mask; + unsigned char sign_mask[2]; + unsigned char class0_mask[2]; + unsigned char class0_fp_mask[2]; + unsigned char fp_mask[2]; + unsigned char class0_hp_mask[2]; + unsigned char hp_mask[2]; + unsigned char reserve[11]; } rvcn_dec_vp9_nmv_ctx_mask_t; -typedef struct rvcn_dec_vp9_nmv_component_s{ - unsigned char sign; - unsigned char classes[10]; - unsigned char class0[1]; - unsigned char bits[10]; - unsigned char class0_fp[2][3]; - unsigned char fp[3]; - unsigned char class0_hp; - unsigned char hp; +typedef struct rvcn_dec_vp9_nmv_component_s { + unsigned char sign; + unsigned char classes[10]; + unsigned char class0[1]; + unsigned char bits[10]; + unsigned char class0_fp[2][3]; + unsigned char fp[3]; + unsigned char class0_hp; + unsigned char hp; } rvcn_dec_vp9_nmv_component_t; typedef struct rvcn_dec_vp9_probs_s { - rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask; - unsigned char coef_probs[4][2][2][6][6][3]; - unsigned char y_mode_prob[4][9]; - unsigned char uv_mode_prob[10][9]; - unsigned char single_ref_prob[5][2]; - unsigned char switchable_interp_prob[4][2]; - unsigned char partition_prob[16][3]; - unsigned char inter_mode_probs[7][3]; - unsigned char mbskip_probs[3]; - unsigned char intra_inter_prob[4]; - unsigned char comp_inter_prob[5]; - unsigned char comp_ref_prob[5]; - unsigned char tx_probs_32x32[2][3]; - unsigned char tx_probs_16x16[2][2]; - unsigned char tx_probs_8x8[2][1]; - unsigned char mv_joints[3]; - rvcn_dec_vp9_nmv_component_t mv_comps[2]; + rvcn_dec_vp9_nmv_ctx_mask_t nmvc_mask; + unsigned char coef_probs[4][2][2][6][6][3]; + unsigned char y_mode_prob[4][9]; + unsigned char uv_mode_prob[10][9]; + unsigned char single_ref_prob[5][2]; + unsigned char switchable_interp_prob[4][2]; + unsigned char partition_prob[16][3]; + unsigned char inter_mode_probs[7][3]; + unsigned char mbskip_probs[3]; + unsigned char intra_inter_prob[4]; + unsigned char comp_inter_prob[5]; + unsigned char comp_ref_prob[5]; + unsigned char tx_probs_32x32[2][3]; + unsigned char tx_probs_16x16[2][2]; + unsigned char tx_probs_8x8[2][1]; + unsigned char mv_joints[3]; + rvcn_dec_vp9_nmv_component_t mv_comps[2]; } rvcn_dec_vp9_probs_t; typedef struct rvcn_dec_vp9_probs_segment_s { - union { - rvcn_dec_vp9_probs_t probs; - unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE]; - }; - - union { - struct { - unsigned int feature_data[8]; - unsigned char tree_probs[7]; - unsigned char pred_probs[3]; - unsigned char abs_delta; - unsigned char feature_mask[8]; - } seg; - unsigned char segment_data[256]; - }; + union { + rvcn_dec_vp9_probs_t probs; + unsigned char probs_data[RDECODE_VP9_PROBS_DATA_SIZE]; + }; + + union { + struct { + unsigned int feature_data[8]; + unsigned char tree_probs[7]; + unsigned char pred_probs[3]; + unsigned char abs_delta; + unsigned char feature_mask[8]; + } seg; + unsigned char segment_data[256]; + }; } rvcn_dec_vp9_probs_segment_t; struct jpeg_params { - unsigned bsd_size; - unsigned dt_pitch; - unsigned dt_uv_pitch; - unsigned dt_luma_top_offset; - unsigned dt_chroma_top_offset; - bool direct_reg; + unsigned bsd_size; + unsigned dt_pitch; + unsigned dt_uv_pitch; + unsigned dt_luma_top_offset; + unsigned dt_chroma_top_offset; + bool direct_reg; }; struct radeon_decoder { - struct pipe_video_codec base; - - unsigned stream_handle; - unsigned stream_type; - unsigned frame_number; - - struct pipe_screen *screen; - struct radeon_winsys *ws; - struct radeon_cmdbuf *cs; - - void *msg; - uint32_t *fb; - uint8_t *it; - uint8_t *probs; - void *bs_ptr; - - struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS]; - struct rvid_buffer bs_buffers[NUM_BUFFERS]; - struct rvid_buffer dpb; - struct rvid_buffer ctx; - struct rvid_buffer sessionctx; - - unsigned bs_size; - unsigned cur_buffer; - void *render_pic_list[16]; - bool show_frame; - unsigned ref_idx; - struct { - unsigned data0; - unsigned data1; - unsigned cmd; - unsigned cntl; - } reg; - struct jpeg_params jpg; - void (*send_cmd)(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); + struct pipe_video_codec base; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys *ws; + struct radeon_cmdbuf *cs; + + void *msg; + uint32_t *fb; + uint8_t *it; + uint8_t *probs; + void *bs_ptr; + + struct rvid_buffer msg_fb_it_probs_buffers[NUM_BUFFERS]; + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + struct rvid_buffer dpb; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + + unsigned bs_size; + unsigned cur_buffer; + void *render_pic_list[16]; + bool show_frame; + unsigned ref_idx; + struct { + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned cntl; + } reg; + struct jpeg_params jpg; + void (*send_cmd)(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); }; -void send_cmd_dec(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); +void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); -void send_cmd_jpeg(struct radeon_decoder *dec, - struct pipe_video_buffer *target, - struct pipe_picture_desc *picture); +void send_cmd_jpeg(struct radeon_decoder *dec, struct pipe_video_buffer *target, + struct pipe_picture_desc *picture); struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templat); + const struct pipe_video_codec *templat); #endif diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c index 31b28f76d..8e2b1a3c8 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.c +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.c @@ -25,189 +25,99 @@ * **************************************************************************/ -#include <unistd.h> +#include "radeon_video.h" +#include "radeon_vce.h" +#include "radeonsi/si_pipe.h" #include "util/u_memory.h" #include "util/u_video.h" - #include "vl/vl_defines.h" #include "vl/vl_video_buffer.h" -#include "radeonsi/si_pipe.h" -#include "radeon_video.h" -#include "radeon_vce.h" +#include <unistd.h> /* generate an stream handle */ unsigned si_vid_alloc_stream_handle() { - static unsigned counter = 0; - unsigned stream_handle = 0; - unsigned pid = getpid(); - int i; + static unsigned counter = 0; + unsigned stream_handle = 0; + unsigned pid = getpid(); + int i; - for (i = 0; i < 32; ++i) - stream_handle |= ((pid >> i) & 1) << (31 - i); + for (i = 0; i < 32; ++i) + stream_handle |= ((pid >> i) & 1) << (31 - i); - stream_handle ^= ++counter; - return stream_handle; + stream_handle ^= ++counter; + return stream_handle; } /* create a buffer in the winsys */ -bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, - unsigned size, unsigned usage) +bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, unsigned size, + unsigned usage) { - memset(buffer, 0, sizeof(*buffer)); - buffer->usage = usage; + memset(buffer, 0, sizeof(*buffer)); + buffer->usage = usage; - /* Hardware buffer placement restrictions require the kernel to be - * able to move buffers around individually, so request a - * non-sub-allocated buffer. - */ - buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, - usage, size)); + /* Hardware buffer placement restrictions require the kernel to be + * able to move buffers around individually, so request a + * non-sub-allocated buffer. + */ + buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, usage, size)); - return buffer->res != NULL; + return buffer->res != NULL; } /* destroy a buffer */ void si_vid_destroy_buffer(struct rvid_buffer *buffer) { - si_resource_reference(&buffer->res, NULL); + si_resource_reference(&buffer->res, NULL); } /* reallocate a buffer, preserving its content */ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, - struct rvid_buffer *new_buf, unsigned new_size) + struct rvid_buffer *new_buf, unsigned new_size) { - struct si_screen *sscreen = (struct si_screen *)screen; - struct radeon_winsys* ws = sscreen->ws; - unsigned bytes = MIN2(new_buf->res->buf->size, new_size); - struct rvid_buffer old_buf = *new_buf; - void *src = NULL, *dst = NULL; - - if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) - goto error; - - src = ws->buffer_map(old_buf.res->buf, cs, - PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); - if (!src) - goto error; - - dst = ws->buffer_map(new_buf->res->buf, cs, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); - if (!dst) - goto error; - - memcpy(dst, src, bytes); - if (new_size > bytes) { - new_size -= bytes; - dst += bytes; - memset(dst, 0, new_size); - } - ws->buffer_unmap(new_buf->res->buf); - ws->buffer_unmap(old_buf.res->buf); - si_vid_destroy_buffer(&old_buf); - return true; + struct si_screen *sscreen = (struct si_screen *)screen; + struct radeon_winsys *ws = sscreen->ws; + unsigned bytes = MIN2(new_buf->res->buf->size, new_size); + struct rvid_buffer old_buf = *new_buf; + void *src = NULL, *dst = NULL; + + if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) + goto error; + + src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); + if (!src) + goto error; + + dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + if (!dst) + goto error; + + memcpy(dst, src, bytes); + if (new_size > bytes) { + new_size -= bytes; + dst += bytes; + memset(dst, 0, new_size); + } + ws->buffer_unmap(new_buf->res->buf); + ws->buffer_unmap(old_buf.res->buf); + si_vid_destroy_buffer(&old_buf); + return true; error: - if (src) - ws->buffer_unmap(old_buf.res->buf); - si_vid_destroy_buffer(new_buf); - *new_buf = old_buf; - return false; + if (src) + ws->buffer_unmap(old_buf.res->buf); + si_vid_destroy_buffer(new_buf); + *new_buf = old_buf; + return false; } /* clear the buffer with zeros */ -void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) +void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer *buffer) { - struct si_context *sctx = (struct si_context*)context; + struct si_context *sctx = (struct si_context *)context; - si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->b.b.width0, 0); - context->flush(context, NULL, 0); -} - -/** - * join surfaces into the same buffer with identical tiling params - * sumup their sizes and replace the backend buffers with a single bo - */ -void si_vid_join_surfaces(struct si_context *sctx, - struct pb_buffer** buffers[VL_NUM_COMPONENTS], - struct radeon_surf *surfaces[VL_NUM_COMPONENTS]) -{ - struct radeon_winsys *ws = sctx->ws;; - unsigned best_tiling, best_wh, off; - unsigned size, alignment; - struct pb_buffer *pb; - unsigned i, j; - - for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) { - unsigned wh; - - if (!surfaces[i]) - continue; - - if (sctx->chip_class < GFX9) { - /* choose the smallest bank w/h for now */ - wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh; - if (wh < best_wh) { - best_wh = wh; - best_tiling = i; - } - } - } - - for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!surfaces[i]) - continue; - - /* adjust the texture layer offsets */ - off = align(off, surfaces[i]->surf_alignment); - - if (sctx->chip_class < GFX9) { - /* copy the tiling parameters */ - surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw; - surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh; - surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea; - surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split; - - for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j) - surfaces[i]->u.legacy.level[j].offset += off; - } else { - surfaces[i]->u.gfx9.surf_offset += off; - for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.gfx9.offset); ++j) - surfaces[i]->u.gfx9.offset[j] += off; - } - - surfaces[i]->flags |= RADEON_SURF_IMPORTED; - off += surfaces[i]->surf_size; - } - - for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!buffers[i] || !*buffers[i]) - continue; - - size = align(size, (*buffers[i])->alignment); - size += (*buffers[i])->size; - alignment = MAX2(alignment, (*buffers[i])->alignment * 1); - } - - if (!size) - return; - - /* TODO: 2D tiling workaround */ - alignment *= 2; - - pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM, - RADEON_FLAG_GTT_WC); - if (!pb) - return; - - for (i = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!buffers[i] || !*buffers[i]) - continue; - - pb_reference(buffers[i], pb); - } - - pb_reference(&pb, NULL); + si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->b.b.width0, 0); + context->flush(context, NULL, 0); } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h index b7797c05d..5ef0a2134 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_video.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_video.h @@ -31,39 +31,32 @@ #include "radeon/radeon_winsys.h" #include "vl/vl_video_buffer.h" -#define RVID_ERR(fmt, args...) \ - fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) +#define RVID_ERR(fmt, args...) \ + fprintf(stderr, "EE %s:%d %s UVD - " fmt, __FILE__, __LINE__, __func__, ##args) #define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) /* video buffer representation */ -struct rvid_buffer -{ - unsigned usage; - struct si_resource *res; +struct rvid_buffer { + unsigned usage; + struct si_resource *res; }; /* generate an stream handle */ unsigned si_vid_alloc_stream_handle(void); /* create a buffer in the winsys */ -bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, - unsigned size, unsigned usage); +bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, unsigned size, + unsigned usage); /* destroy a buffer */ void si_vid_destroy_buffer(struct rvid_buffer *buffer); /* reallocate a buffer, preserving its content */ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, - struct rvid_buffer *new_buf, unsigned new_size); + struct rvid_buffer *new_buf, unsigned new_size); /* clear the buffer with zeros */ -void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer); - -/* join surfaces into the same buffer with identical tiling params - sumup their sizes and replace the backend buffers with a single bo */ -void si_vid_join_surfaces(struct si_context *sctx, - struct pb_buffer** buffers[VL_NUM_COMPONENTS], - struct radeon_surf *surfaces[VL_NUM_COMPONENTS]); +void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer *buffer); #endif // RADEON_VIDEO_H diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h index 525c28182..e3bb9c3f3 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_winsys.h @@ -30,65 +30,70 @@ /* Whether the next IB can start immediately and not wait for draws and * dispatches from the current IB to finish. */ -#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) +#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31) -#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ - (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) - -#include "pipebuffer/pb_buffer.h" +#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \ + (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW) #include "amd/common/ac_gpu_info.h" #include "amd/common/ac_surface.h" +#include "pipebuffer/pb_buffer.h" /* Tiling flags. */ -enum radeon_bo_layout { - RADEON_LAYOUT_LINEAR = 0, - RADEON_LAYOUT_TILED, - RADEON_LAYOUT_SQUARETILED, +enum radeon_bo_layout +{ + RADEON_LAYOUT_LINEAR = 0, + RADEON_LAYOUT_TILED, + RADEON_LAYOUT_SQUARETILED, - RADEON_LAYOUT_UNKNOWN + RADEON_LAYOUT_UNKNOWN }; -enum radeon_bo_domain { /* bitfield */ - RADEON_DOMAIN_GTT = 2, - RADEON_DOMAIN_VRAM = 4, - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, - RADEON_DOMAIN_GDS = 8, - RADEON_DOMAIN_OA = 16, +enum radeon_bo_domain +{ /* bitfield */ + RADEON_DOMAIN_GTT = 2, + RADEON_DOMAIN_VRAM = 4, + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, + RADEON_DOMAIN_GDS = 8, + RADEON_DOMAIN_OA = 16, }; -enum radeon_bo_flag { /* bitfield */ - RADEON_FLAG_GTT_WC = (1 << 0), - RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), - RADEON_FLAG_NO_SUBALLOC = (1 << 2), - RADEON_FLAG_SPARSE = (1 << 3), - RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), - RADEON_FLAG_READ_ONLY = (1 << 5), - RADEON_FLAG_32BIT = (1 << 6), +enum radeon_bo_flag +{ /* bitfield */ + RADEON_FLAG_GTT_WC = (1 << 0), + RADEON_FLAG_NO_CPU_ACCESS = (1 << 1), + RADEON_FLAG_NO_SUBALLOC = (1 << 2), + RADEON_FLAG_SPARSE = (1 << 3), + RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4), + RADEON_FLAG_READ_ONLY = (1 << 5), + RADEON_FLAG_32BIT = (1 << 6), }; -enum radeon_dependency_flag { - /* Add the dependency to the parallel compute IB only. */ - RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0, +enum radeon_dependency_flag +{ + /* Add the dependency to the parallel compute IB only. */ + RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0, - /* Instead of waiting for a job to finish execution, the dependency will - * be signaled when the job starts execution. - */ - RADEON_DEPENDENCY_START_FENCE = 1 << 1, + /* Instead of waiting for a job to finish execution, the dependency will + * be signaled when the job starts execution. + */ + RADEON_DEPENDENCY_START_FENCE = 1 << 1, }; -enum radeon_bo_usage { /* bitfield */ - RADEON_USAGE_READ = 2, - RADEON_USAGE_WRITE = 4, - RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, +enum radeon_bo_usage +{ /* bitfield */ + RADEON_USAGE_READ = 2, + RADEON_USAGE_WRITE = 4, + RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE, - /* The winsys ensures that the CS submission will be scheduled after - * previously flushed CSs referencing this BO in a conflicting way. - */ - RADEON_USAGE_SYNCHRONIZED = 8 + /* The winsys ensures that the CS submission will be scheduled after + * previously flushed CSs referencing this BO in a conflicting way. + */ + RADEON_USAGE_SYNCHRONIZED = 8 }; -enum radeon_transfer_flags { +enum radeon_transfer_flags +{ /* Indicates that the caller will unmap the buffer. * * Not unmapping buffers is an important performance optimization for @@ -99,787 +104,735 @@ enum radeon_transfer_flags { #define RADEON_SPARSE_PAGE_SIZE (64 * 1024) -enum ring_type { - RING_GFX = 0, - RING_COMPUTE, - RING_DMA, - RING_UVD, - RING_VCE, - RING_UVD_ENC, - RING_VCN_DEC, - RING_VCN_ENC, - RING_VCN_JPEG, - RING_LAST, -}; - -enum radeon_value_id { - RADEON_REQUESTED_VRAM_MEMORY, - RADEON_REQUESTED_GTT_MEMORY, - RADEON_MAPPED_VRAM, - RADEON_MAPPED_GTT, - RADEON_BUFFER_WAIT_TIME_NS, - RADEON_NUM_MAPPED_BUFFERS, - RADEON_TIMESTAMP, - RADEON_NUM_GFX_IBS, - RADEON_NUM_SDMA_IBS, - RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */ - RADEON_GFX_IB_SIZE_COUNTER, - RADEON_NUM_BYTES_MOVED, - RADEON_NUM_EVICTIONS, - RADEON_NUM_VRAM_CPU_PAGE_FAULTS, - RADEON_VRAM_USAGE, - RADEON_VRAM_VIS_USAGE, - RADEON_GTT_USAGE, - RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ - RADEON_CURRENT_SCLK, - RADEON_CURRENT_MCLK, - RADEON_CS_THREAD_TIME, +enum radeon_value_id +{ + RADEON_REQUESTED_VRAM_MEMORY, + RADEON_REQUESTED_GTT_MEMORY, + RADEON_MAPPED_VRAM, + RADEON_MAPPED_GTT, + RADEON_BUFFER_WAIT_TIME_NS, + RADEON_NUM_MAPPED_BUFFERS, + RADEON_TIMESTAMP, + RADEON_NUM_GFX_IBS, + RADEON_NUM_SDMA_IBS, + RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */ + RADEON_GFX_IB_SIZE_COUNTER, + RADEON_NUM_BYTES_MOVED, + RADEON_NUM_EVICTIONS, + RADEON_NUM_VRAM_CPU_PAGE_FAULTS, + RADEON_VRAM_USAGE, + RADEON_VRAM_VIS_USAGE, + RADEON_GTT_USAGE, + RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ + RADEON_CURRENT_SCLK, + RADEON_CURRENT_MCLK, + RADEON_CS_THREAD_TIME, }; -enum radeon_bo_priority { - /* Each group of two has the same priority. */ - RADEON_PRIO_FENCE = 0, - RADEON_PRIO_TRACE, +enum radeon_bo_priority +{ + /* Each group of two has the same priority. */ + RADEON_PRIO_FENCE = 0, + RADEON_PRIO_TRACE, - RADEON_PRIO_SO_FILLED_SIZE = 2, - RADEON_PRIO_QUERY, + RADEON_PRIO_SO_FILLED_SIZE = 2, + RADEON_PRIO_QUERY, - RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ - RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ + RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ + RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ - RADEON_PRIO_DRAW_INDIRECT = 6, - RADEON_PRIO_INDEX_BUFFER, + RADEON_PRIO_DRAW_INDIRECT = 6, + RADEON_PRIO_INDEX_BUFFER, - RADEON_PRIO_CP_DMA = 8, - RADEON_PRIO_BORDER_COLORS, + RADEON_PRIO_CP_DMA = 8, + RADEON_PRIO_BORDER_COLORS, - RADEON_PRIO_CONST_BUFFER = 10, - RADEON_PRIO_DESCRIPTORS, + RADEON_PRIO_CONST_BUFFER = 10, + RADEON_PRIO_DESCRIPTORS, - RADEON_PRIO_SAMPLER_BUFFER = 12, - RADEON_PRIO_VERTEX_BUFFER, + RADEON_PRIO_SAMPLER_BUFFER = 12, + RADEON_PRIO_VERTEX_BUFFER, - RADEON_PRIO_SHADER_RW_BUFFER = 14, - RADEON_PRIO_COMPUTE_GLOBAL, + RADEON_PRIO_SHADER_RW_BUFFER = 14, + RADEON_PRIO_COMPUTE_GLOBAL, - RADEON_PRIO_SAMPLER_TEXTURE = 16, - RADEON_PRIO_SHADER_RW_IMAGE, + RADEON_PRIO_SAMPLER_TEXTURE = 16, + RADEON_PRIO_SHADER_RW_IMAGE, - RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, - RADEON_PRIO_COLOR_BUFFER, + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18, + RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_DEPTH_BUFFER = 20, + RADEON_PRIO_DEPTH_BUFFER = 20, - RADEON_PRIO_COLOR_BUFFER_MSAA = 22, + RADEON_PRIO_COLOR_BUFFER_MSAA = 22, - RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, + RADEON_PRIO_DEPTH_BUFFER_MSAA = 24, - RADEON_PRIO_SEPARATE_META = 26, - RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ + RADEON_PRIO_SEPARATE_META = 26, + RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */ - RADEON_PRIO_SHADER_RINGS = 28, + RADEON_PRIO_SHADER_RINGS = 28, - RADEON_PRIO_SCRATCH_BUFFER = 30, - /* 31 is the maximum value */ + RADEON_PRIO_SCRATCH_BUFFER = 30, + /* 31 is the maximum value */ }; struct winsys_handle; struct radeon_winsys_ctx; struct radeon_cmdbuf_chunk { - unsigned cdw; /* Number of used dwords. */ - unsigned max_dw; /* Maximum number of dwords. */ - uint32_t *buf; /* The base pointer of the chunk. */ + unsigned cdw; /* Number of used dwords. */ + unsigned max_dw; /* Maximum number of dwords. */ + uint32_t *buf; /* The base pointer of the chunk. */ }; struct radeon_cmdbuf { - struct radeon_cmdbuf_chunk current; - struct radeon_cmdbuf_chunk *prev; - unsigned num_prev; /* Number of previous chunks. */ - unsigned max_prev; /* Space in array pointed to by prev. */ - unsigned prev_dw; /* Total number of dwords in previous chunks. */ - - /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ - uint64_t used_vram; - uint64_t used_gart; - uint64_t gpu_address; + struct radeon_cmdbuf_chunk current; + struct radeon_cmdbuf_chunk *prev; + unsigned num_prev; /* Number of previous chunks. */ + unsigned max_prev; /* Space in array pointed to by prev. */ + unsigned prev_dw; /* Total number of dwords in previous chunks. */ + + /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ + uint64_t used_vram; + uint64_t used_gart; + uint64_t gpu_address; }; /* Tiling info for display code, DRI sharing, and other data. */ struct radeon_bo_metadata { - /* Tiling flags describing the texture layout for display code - * and DRI sharing. - */ - union { - struct { - enum radeon_bo_layout microtile; - enum radeon_bo_layout macrotile; - unsigned pipe_config; - unsigned bankw; - unsigned bankh; - unsigned tile_split; - unsigned mtilea; - unsigned num_banks; - unsigned stride; - bool scanout; - } legacy; - - struct { - /* surface flags */ - unsigned swizzle_mode:5; - - /* DCC flags */ - /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ - unsigned dcc_offset_256B:24; - unsigned dcc_pitch_max:14; /* (mip chain pitch - 1) for DCN */ - unsigned dcc_independent_64B:1; - } gfx9; - } u; - - /* Additional metadata associated with the buffer, in bytes. - * The maximum size is 64 * 4. This is opaque for the winsys & kernel. - * Supported by amdgpu only. - */ - uint32_t size_metadata; - uint32_t metadata[64]; + /* Tiling flags describing the texture layout for display code + * and DRI sharing. + */ + union { + struct { + enum radeon_bo_layout microtile; + enum radeon_bo_layout macrotile; + unsigned pipe_config; + unsigned bankw; + unsigned bankh; + unsigned tile_split; + unsigned mtilea; + unsigned num_banks; + unsigned stride; + bool scanout; + } legacy; + + struct { + /* surface flags */ + unsigned swizzle_mode : 5; + + /* DCC flags */ + /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */ + unsigned dcc_offset_256B : 24; + unsigned dcc_pitch_max : 14; /* (mip chain pitch - 1) for DCN */ + unsigned dcc_independent_64B : 1; + unsigned dcc_independent_128B : 1; + unsigned dcc_max_compressed_block_size : 2; + + bool scanout; + } gfx9; + } u; + + /* Additional metadata associated with the buffer, in bytes. + * The maximum size is 64 * 4. This is opaque for the winsys & kernel. + * Supported by amdgpu only. + */ + uint32_t size_metadata; + uint32_t metadata[64]; }; -enum radeon_feature_id { - RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ - RADEON_FID_R300_CMASK_ACCESS, +enum radeon_feature_id +{ + RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */ + RADEON_FID_R300_CMASK_ACCESS, }; struct radeon_bo_list_item { - uint64_t bo_size; - uint64_t vm_address; - uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ + uint64_t bo_size; + uint64_t vm_address; + uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */ }; struct radeon_winsys { - /** - * The screen object this winsys was created for - */ - struct pipe_screen *screen; - - /** - * Decrement the winsys reference count. - * - * \param ws The winsys this function is called for. - * \return True if the winsys and screen should be destroyed. - */ - bool (*unref)(struct radeon_winsys *ws); - - /** - * Destroy this winsys. - * - * \param ws The winsys this function is called from. - */ - void (*destroy)(struct radeon_winsys *ws); - - /** - * Query an info structure from winsys. - * - * \param ws The winsys this function is called from. - * \param info Return structure - */ - void (*query_info)(struct radeon_winsys *ws, - struct radeon_info *info); - - /** - * A hint for the winsys that it should pin its execution threads to - * a group of cores sharing a specific L3 cache if the CPU has multiple - * L3 caches. This is needed for good multithreading performance on - * AMD Zen CPUs. - */ - void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); - - /************************************************************************** - * Buffer management. Buffer attributes are mostly fixed over its lifetime. - * - * Remember that gallium gets to choose the interface it needs, and the - * window systems must then implement that interface (rather than the - * other way around...). - *************************************************************************/ - - /** - * Create a buffer object. - * - * \param ws The winsys this function is called from. - * \param size The size to allocate. - * \param alignment An alignment of the buffer in memory. - * \param use_reusable_pool Whether the cache buffer manager should be used. - * \param domain A bitmask of the RADEON_DOMAIN_* flags. - * \return The created buffer object. - */ - struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, - uint64_t size, - unsigned alignment, - enum radeon_bo_domain domain, - enum radeon_bo_flag flags); - - /** - * Map the entire data store of a buffer object into the client's address - * space. - * - * Callers are expected to unmap buffers again if and only if the - * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. - * - * \param buf A winsys buffer object to map. - * \param cs A command stream to flush if the buffer is referenced by it. - * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. - * \return The pointer at the beginning of the buffer. - */ - void *(*buffer_map)(struct pb_buffer *buf, - struct radeon_cmdbuf *cs, - enum pipe_transfer_usage usage); - - /** - * Unmap a buffer object from the client's address space. - * - * \param buf A winsys buffer object to unmap. - */ - void (*buffer_unmap)(struct pb_buffer *buf); - - /** - * Wait for the buffer and return true if the buffer is not used - * by the device. - * - * The timeout of 0 will only return the status. - * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer - * is idle. - */ - bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout, - enum radeon_bo_usage usage); - - /** - * Return buffer metadata. - * (tiling info for display code, DRI sharing, and other data) - * - * \param buf A winsys buffer object to get the flags from. - * \param md Metadata - */ - void (*buffer_get_metadata)(struct pb_buffer *buf, - struct radeon_bo_metadata *md); - - /** - * Set buffer metadata. - * (tiling info for display code, DRI sharing, and other data) - * - * \param buf A winsys buffer object to set the flags for. - * \param md Metadata - */ - void (*buffer_set_metadata)(struct pb_buffer *buf, - struct radeon_bo_metadata *md); - - /** - * Get a winsys buffer from a winsys handle. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param ws The winsys this function is called from. - * \param whandle A winsys handle pointer as was received from a state - * tracker. - * \param stride The returned buffer stride in bytes. - */ - struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, - struct winsys_handle *whandle, - unsigned vm_alignment, - unsigned *stride, unsigned *offset); - - /** - * Get a winsys buffer from a user pointer. The resulting buffer can't - * be exported. Both pointer and size must be page aligned. - * - * \param ws The winsys this function is called from. - * \param pointer User pointer to turn into a buffer object. - * \param Size Size in bytes for the new buffer. - */ - struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, - void *pointer, uint64_t size); - - /** - * Whether the buffer was created from a user pointer. - * - * \param buf A winsys buffer object - * \return whether \p buf was created via buffer_from_ptr - */ - bool (*buffer_is_user_ptr)(struct pb_buffer *buf); - - /** Whether the buffer was suballocated. */ - bool (*buffer_is_suballocated)(struct pb_buffer *buf); - - /** - * Get a winsys handle from a winsys buffer. The internal structure - * of the handle is platform-specific and only a winsys should access it. - * - * \param ws The winsys instance for which the handle is to be valid - * \param buf A winsys buffer object to get the handle from. - * \param whandle A winsys handle pointer. - * \param stride A stride of the buffer in bytes, for texturing. - * \return true on success. - */ - bool (*buffer_get_handle)(struct radeon_winsys *ws, - struct pb_buffer *buf, - unsigned stride, unsigned offset, - unsigned slice_size, - struct winsys_handle *whandle); - - /** - * Change the commitment of a (64KB-page aligned) region of the given - * sparse buffer. - * - * \warning There is no automatic synchronization with command submission. - * - * \note Only implemented by the amdgpu winsys. - * - * \return false on out of memory or other failure, true on success. - */ - bool (*buffer_commit)(struct pb_buffer *buf, - uint64_t offset, uint64_t size, - bool commit); - - /** - * Return the virtual address of a buffer. - * - * When virtual memory is not in use, this is the offset relative to the - * relocation base (non-zero for sub-allocated buffers). - * - * \param buf A winsys buffer object - * \return virtual address - */ - uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); - - /** - * Return the offset of this buffer relative to the relocation base. - * This is only non-zero for sub-allocated buffers. - * - * This is only supported in the radeon winsys, since amdgpu uses virtual - * addresses in submissions even for the video engines. - * - * \param buf A winsys buffer object - * \return the offset for relocations - */ - unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf); - - /** - * Query the initial placement of the buffer from the kernel driver. - */ - enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf); - - /************************************************************************** - * Command submission. - * - * Each pipe context should create its own command stream and submit - * commands independently of other contexts. - *************************************************************************/ - - /** - * Create a command submission context. - * Various command streams can be submitted to the same context. - */ - struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); - - /** - * Destroy a context. - */ - void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); - - /** - * Query a GPU reset status. - */ - enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx); - - /** - * Create a command stream. - * - * \param ctx The submission context - * \param ring_type The ring type (GFX, DMA, UVD) - * \param flush Flush callback function associated with the command stream. - * \param user User pointer that will be passed to the flush callback. - */ - struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, - enum ring_type ring_type, - void (*flush)(void *ctx, unsigned flags, - struct pipe_fence_handle **fence), - void *flush_ctx, - bool stop_exec_on_failure); - - /** - * Add a parallel compute IB to a gfx IB. It will share the buffer list - * and fence dependencies with the gfx IB. The gfx flush call will submit - * both IBs at the same time. - * - * The compute IB doesn't have an output fence, so the primary IB has - * to use a wait packet for synchronization. - * - * The returned IB is only a stream for writing packets to the new - * IB. Calling other winsys functions with it is not allowed, not even - * "cs_destroy". Use the gfx IB instead. - * - * \param cs Gfx IB - */ - struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, - bool uses_gds_ordered_append); - - /** - * Destroy a command stream. - * - * \param cs A command stream to destroy. - */ - void (*cs_destroy)(struct radeon_cmdbuf *cs); - - /** - * Add a buffer. Each buffer used by a CS must be added using this function. - * - * \param cs Command stream - * \param buf Buffer - * \param usage Whether the buffer is used for read and/or write. - * \param domain Bitmask of the RADEON_DOMAIN_* flags. - * \param priority A higher number means a greater chance of being - * placed in the requested domain. 15 is the maximum. - * \return Buffer index. - */ - unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf, - enum radeon_bo_usage usage, - enum radeon_bo_domain domain, + /** + * The screen object this winsys was created for + */ + struct pipe_screen *screen; + + /** + * Decrement the winsys reference count. + * + * \param ws The winsys this function is called for. + * \return True if the winsys and screen should be destroyed. + */ + bool (*unref)(struct radeon_winsys *ws); + + /** + * Destroy this winsys. + * + * \param ws The winsys this function is called from. + */ + void (*destroy)(struct radeon_winsys *ws); + + /** + * Query an info structure from winsys. + * + * \param ws The winsys this function is called from. + * \param info Return structure + */ + void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info); + + /** + * A hint for the winsys that it should pin its execution threads to + * a group of cores sharing a specific L3 cache if the CPU has multiple + * L3 caches. This is needed for good multithreading performance on + * AMD Zen CPUs. + */ + void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); + + /************************************************************************** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + *************************************************************************/ + + /** + * Create a buffer object. + * + * \param ws The winsys this function is called from. + * \param size The size to allocate. + * \param alignment An alignment of the buffer in memory. + * \param use_reusable_pool Whether the cache buffer manager should be used. + * \param domain A bitmask of the RADEON_DOMAIN_* flags. + * \return The created buffer object. + */ + struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, + enum radeon_bo_domain domain, enum radeon_bo_flag flags); + + /** + * Map the entire data store of a buffer object into the client's address + * space. + * + * Callers are expected to unmap buffers again if and only if the + * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. + * + * \param buf A winsys buffer object to map. + * \param cs A command stream to flush if the buffer is referenced by it. + * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. + * \return The pointer at the beginning of the buffer. + */ + void *(*buffer_map)(struct pb_buffer *buf, struct radeon_cmdbuf *cs, + enum pipe_transfer_usage usage); + + /** + * Unmap a buffer object from the client's address space. + * + * \param buf A winsys buffer object to unmap. + */ + void (*buffer_unmap)(struct pb_buffer *buf); + + /** + * Wait for the buffer and return true if the buffer is not used + * by the device. + * + * The timeout of 0 will only return the status. + * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer + * is idle. + */ + bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout, enum radeon_bo_usage usage); + + /** + * Return buffer metadata. + * (tiling info for display code, DRI sharing, and other data) + * + * \param buf A winsys buffer object to get the flags from. + * \param md Metadata + */ + void (*buffer_get_metadata)(struct pb_buffer *buf, struct radeon_bo_metadata *md); + + /** + * Set buffer metadata. + * (tiling info for display code, DRI sharing, and other data) + * + * \param buf A winsys buffer object to set the flags for. + * \param md Metadata + */ + void (*buffer_set_metadata)(struct pb_buffer *buf, struct radeon_bo_metadata *md); + + /** + * Get a winsys buffer from a winsys handle. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys this function is called from. + * \param whandle A winsys handle pointer as was received from a state + * tracker. + */ + struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle, + unsigned vm_alignment); + + /** + * Get a winsys buffer from a user pointer. The resulting buffer can't + * be exported. Both pointer and size must be page aligned. + * + * \param ws The winsys this function is called from. + * \param pointer User pointer to turn into a buffer object. + * \param Size Size in bytes for the new buffer. + */ + struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, uint64_t size); + + /** + * Whether the buffer was created from a user pointer. + * + * \param buf A winsys buffer object + * \return whether \p buf was created via buffer_from_ptr + */ + bool (*buffer_is_user_ptr)(struct pb_buffer *buf); + + /** Whether the buffer was suballocated. */ + bool (*buffer_is_suballocated)(struct pb_buffer *buf); + + /** + * Get a winsys handle from a winsys buffer. The internal structure + * of the handle is platform-specific and only a winsys should access it. + * + * \param ws The winsys instance for which the handle is to be valid + * \param buf A winsys buffer object to get the handle from. + * \param whandle A winsys handle pointer. + * \return true on success. + */ + bool (*buffer_get_handle)(struct radeon_winsys *ws, struct pb_buffer *buf, + struct winsys_handle *whandle); + + /** + * Change the commitment of a (64KB-page aligned) region of the given + * sparse buffer. + * + * \warning There is no automatic synchronization with command submission. + * + * \note Only implemented by the amdgpu winsys. + * + * \return false on out of memory or other failure, true on success. + */ + bool (*buffer_commit)(struct pb_buffer *buf, uint64_t offset, uint64_t size, bool commit); + + /** + * Return the virtual address of a buffer. + * + * When virtual memory is not in use, this is the offset relative to the + * relocation base (non-zero for sub-allocated buffers). + * + * \param buf A winsys buffer object + * \return virtual address + */ + uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); + + /** + * Return the offset of this buffer relative to the relocation base. + * This is only non-zero for sub-allocated buffers. + * + * This is only supported in the radeon winsys, since amdgpu uses virtual + * addresses in submissions even for the video engines. + * + * \param buf A winsys buffer object + * \return the offset for relocations + */ + unsigned (*buffer_get_reloc_offset)(struct pb_buffer *buf); + + /** + * Query the initial placement of the buffer from the kernel driver. + */ + enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf); + + /** + * Query the flags used for creation of this buffer. + * + * Note that for imported buffer this may be lossy since not all flags + * are passed 1:1. + */ + enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer *buf); + + /************************************************************************** + * Command submission. + * + * Each pipe context should create its own command stream and submit + * commands independently of other contexts. + *************************************************************************/ + + /** + * Create a command submission context. + * Various command streams can be submitted to the same context. + */ + struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); + + /** + * Destroy a context. + */ + void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); + + /** + * Query a GPU reset status. + */ + enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx); + + /** + * Create a command stream. + * + * \param ctx The submission context + * \param ring_type The ring type (GFX, DMA, UVD) + * \param flush Flush callback function associated with the command stream. + * \param user User pointer that will be passed to the flush callback. + */ + struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, + void (*flush)(void *ctx, unsigned flags, + struct pipe_fence_handle **fence), + void *flush_ctx, bool stop_exec_on_failure); + + /** + * Add a parallel compute IB to a gfx IB. It will share the buffer list + * and fence dependencies with the gfx IB. The gfx flush call will submit + * both IBs at the same time. + * + * The compute IB doesn't have an output fence, so the primary IB has + * to use a wait packet for synchronization. + * + * The returned IB is only a stream for writing packets to the new + * IB. Calling other winsys functions with it is not allowed, not even + * "cs_destroy". Use the gfx IB instead. + * + * \param cs Gfx IB + */ + struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs, + bool uses_gds_ordered_append); + + /** + * Destroy a command stream. + * + * \param cs A command stream to destroy. + */ + void (*cs_destroy)(struct radeon_cmdbuf *cs); + + /** + * Add a buffer. Each buffer used by a CS must be added using this function. + * + * \param cs Command stream + * \param buf Buffer + * \param usage Whether the buffer is used for read and/or write. + * \param domain Bitmask of the RADEON_DOMAIN_* flags. + * \param priority A higher number means a greater chance of being + * placed in the requested domain. 15 is the maximum. + * \return Buffer index. + */ + unsigned (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, enum radeon_bo_priority priority); - /** - * Return the index of an already-added buffer. - * - * Not supported on amdgpu. Drivers with GPUVM should not care about - * buffer indices. - * - * \param cs Command stream - * \param buf Buffer - * \return The buffer index, or -1 if the buffer has not been added. - */ - int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf); - - /** - * Return true if there is enough memory in VRAM and GTT for the buffers - * added so far. If the validation fails, all buffers which have - * been added since the last call of cs_validate will be removed and - * the CS will be flushed (provided there are still any buffers). - * - * \param cs A command stream to validate. - */ - bool (*cs_validate)(struct radeon_cmdbuf *cs); - - /** - * Check whether the given number of dwords is available in the IB. - * Optionally chain a new chunk of the IB if necessary and supported. - * - * \param cs A command stream. - * \param dw Number of CS dwords requested by the caller. - * \param force_chaining Chain the IB into a new buffer now to discard - * the CP prefetch cache (to emulate PKT3_REWIND) - * \return true if there is enough space - */ - bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw, - bool force_chaining); - - /** - * Return the buffer list. - * - * This is the buffer list as passed to the kernel, i.e. it only contains - * the parent buffers of sub-allocated buffers. - * - * \param cs Command stream - * \param list Returned buffer list. Set to NULL to query the count only. - * \return The buffer count. - */ - unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, - struct radeon_bo_list_item *list); - - /** - * Flush a command stream. - * - * \param cs A command stream to flush. - * \param flags, PIPE_FLUSH_* flags. - * \param fence Pointer to a fence. If non-NULL, a fence is inserted - * after the CS and is returned through this parameter. - * \return Negative POSIX error code or 0 for success. - * Asynchronous submissions never return an error. - */ - int (*cs_flush)(struct radeon_cmdbuf *cs, - unsigned flags, - struct pipe_fence_handle **fence); - - /** - * Create a fence before the CS is flushed. - * The user must flush manually to complete the initializaton of the fence. - * - * The fence must not be used for anything except \ref cs_add_fence_dependency - * before the flush. - */ - struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); - - /** - * Return true if a buffer is referenced by a command stream. - * - * \param cs A command stream. - * \param buf A winsys buffer. - */ - bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, - struct pb_buffer *buf, - enum radeon_bo_usage usage); - - /** - * Request access to a feature for a command stream. - * - * \param cs A command stream. - * \param fid Feature ID, one of RADEON_FID_* - * \param enable Whether to enable or disable the feature. - */ - bool (*cs_request_feature)(struct radeon_cmdbuf *cs, - enum radeon_feature_id fid, - bool enable); - /** - * Make sure all asynchronous flush of the cs have completed - * - * \param cs A command stream. - */ - void (*cs_sync_flush)(struct radeon_cmdbuf *cs); - - /** - * Add a fence dependency to the CS, so that the CS will wait for - * the fence before execution. - * - * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* - */ - void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence, - unsigned dependency_flags); - - /** - * Signal a syncobj when the CS finishes execution. - */ - void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, - struct pipe_fence_handle *fence); - - /** - * Wait for the fence and return true if the fence has been signalled. - * The timeout of 0 will only return the status. - * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence - * is signalled. - */ - bool (*fence_wait)(struct radeon_winsys *ws, - struct pipe_fence_handle *fence, - uint64_t timeout); - - /** - * Reference counting for fences. - */ - void (*fence_reference)(struct pipe_fence_handle **dst, - struct pipe_fence_handle *src); - - /** - * Create a new fence object corresponding to the given syncobj fd. - */ - struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, - int fd); - - /** - * Create a new fence object corresponding to the given sync_file. - */ - struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, - int fd); - - /** - * Return a sync_file FD corresponding to the given fence object. - */ - int (*fence_export_sync_file)(struct radeon_winsys *ws, - struct pipe_fence_handle *fence); - - /** - * Return a sync file FD that is already signalled. - */ - int (*export_signalled_sync_file)(struct radeon_winsys *ws); - - /** - * Initialize surface - * - * \param ws The winsys this function is called from. - * \param tex Input texture description - * \param flags Bitmask of RADEON_SURF_* flags - * \param bpe Bytes per pixel, it can be different for Z buffers. - * \param mode Preferred tile mode. (linear, 1D, or 2D) - * \param surf Output structure - */ - int (*surface_init)(struct radeon_winsys *ws, - const struct pipe_resource *tex, - unsigned flags, unsigned bpe, - enum radeon_surf_mode mode, - struct radeon_surf *surf); - - uint64_t (*query_value)(struct radeon_winsys *ws, - enum radeon_value_id value); - - bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, - unsigned num_registers, uint32_t *out); + /** + * Return the index of an already-added buffer. + * + * Not supported on amdgpu. Drivers with GPUVM should not care about + * buffer indices. + * + * \param cs Command stream + * \param buf Buffer + * \return The buffer index, or -1 if the buffer has not been added. + */ + int (*cs_lookup_buffer)(struct radeon_cmdbuf *cs, struct pb_buffer *buf); + + /** + * Return true if there is enough memory in VRAM and GTT for the buffers + * added so far. If the validation fails, all buffers which have + * been added since the last call of cs_validate will be removed and + * the CS will be flushed (provided there are still any buffers). + * + * \param cs A command stream to validate. + */ + bool (*cs_validate)(struct radeon_cmdbuf *cs); + + /** + * Check whether the given number of dwords is available in the IB. + * Optionally chain a new chunk of the IB if necessary and supported. + * + * \param cs A command stream. + * \param dw Number of CS dwords requested by the caller. + * \param force_chaining Chain the IB into a new buffer now to discard + * the CP prefetch cache (to emulate PKT3_REWIND) + * \return true if there is enough space + */ + bool (*cs_check_space)(struct radeon_cmdbuf *cs, unsigned dw, bool force_chaining); + + /** + * Return the buffer list. + * + * This is the buffer list as passed to the kernel, i.e. it only contains + * the parent buffers of sub-allocated buffers. + * + * \param cs Command stream + * \param list Returned buffer list. Set to NULL to query the count only. + * \return The buffer count. + */ + unsigned (*cs_get_buffer_list)(struct radeon_cmdbuf *cs, struct radeon_bo_list_item *list); + + /** + * Flush a command stream. + * + * \param cs A command stream to flush. + * \param flags, PIPE_FLUSH_* flags. + * \param fence Pointer to a fence. If non-NULL, a fence is inserted + * after the CS and is returned through this parameter. + * \return Negative POSIX error code or 0 for success. + * Asynchronous submissions never return an error. + */ + int (*cs_flush)(struct radeon_cmdbuf *cs, unsigned flags, struct pipe_fence_handle **fence); + + /** + * Create a fence before the CS is flushed. + * The user must flush manually to complete the initializaton of the fence. + * + * The fence must not be used for anything except \ref cs_add_fence_dependency + * before the flush. + */ + struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_cmdbuf *cs); + + /** + * Return true if a buffer is referenced by a command stream. + * + * \param cs A command stream. + * \param buf A winsys buffer. + */ + bool (*cs_is_buffer_referenced)(struct radeon_cmdbuf *cs, struct pb_buffer *buf, + enum radeon_bo_usage usage); + + /** + * Request access to a feature for a command stream. + * + * \param cs A command stream. + * \param fid Feature ID, one of RADEON_FID_* + * \param enable Whether to enable or disable the feature. + */ + bool (*cs_request_feature)(struct radeon_cmdbuf *cs, enum radeon_feature_id fid, bool enable); + /** + * Make sure all asynchronous flush of the cs have completed + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct radeon_cmdbuf *cs); + + /** + * Add a fence dependency to the CS, so that the CS will wait for + * the fence before execution. + * + * \param dependency_flags Bitmask of RADEON_DEPENDENCY_* + */ + void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence, + unsigned dependency_flags); + + /** + * Signal a syncobj when the CS finishes execution. + */ + void (*cs_add_syncobj_signal)(struct radeon_cmdbuf *cs, struct pipe_fence_handle *fence); + + /** + * Wait for the fence and return true if the fence has been signalled. + * The timeout of 0 will only return the status. + * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence + * is signalled. + */ + bool (*fence_wait)(struct radeon_winsys *ws, struct pipe_fence_handle *fence, uint64_t timeout); + + /** + * Reference counting for fences. + */ + void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); + + /** + * Create a new fence object corresponding to the given syncobj fd. + */ + struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, int fd); + + /** + * Create a new fence object corresponding to the given sync_file. + */ + struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, int fd); + + /** + * Return a sync_file FD corresponding to the given fence object. + */ + int (*fence_export_sync_file)(struct radeon_winsys *ws, struct pipe_fence_handle *fence); + + /** + * Return a sync file FD that is already signalled. + */ + int (*export_signalled_sync_file)(struct radeon_winsys *ws); + + /** + * Initialize surface + * + * \param ws The winsys this function is called from. + * \param tex Input texture description + * \param flags Bitmask of RADEON_SURF_* flags + * \param bpe Bytes per pixel, it can be different for Z buffers. + * \param mode Preferred tile mode. (linear, 1D, or 2D) + * \param surf Output structure + */ + int (*surface_init)(struct radeon_winsys *ws, const struct pipe_resource *tex, unsigned flags, + unsigned bpe, enum radeon_surf_mode mode, struct radeon_surf *surf); + + uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); + + bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers, + uint32_t *out); }; static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw) { - return cs && (cs->prev_dw + cs->current.cdw > num_dw); + return cs && (cs->prev_dw + cs->current.cdw > num_dw); } static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value) { - cs->current.buf[cs->current.cdw++] = value; + cs->current.buf[cs->current.cdw++] = value; } -static inline void radeon_emit_array(struct radeon_cmdbuf *cs, - const uint32_t *values, unsigned count) +static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, + unsigned count) { - memcpy(cs->current.buf + cs->current.cdw, values, count * 4); - cs->current.cdw += count; + memcpy(cs->current.buf + cs->current.cdw, values, count * 4); + cs->current.cdw += count; } -enum radeon_heap { - RADEON_HEAP_VRAM_NO_CPU_ACCESS, - RADEON_HEAP_VRAM_READ_ONLY, - RADEON_HEAP_VRAM_READ_ONLY_32BIT, - RADEON_HEAP_VRAM_32BIT, - RADEON_HEAP_VRAM, - RADEON_HEAP_GTT_WC, - RADEON_HEAP_GTT_WC_READ_ONLY, - RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, - RADEON_HEAP_GTT_WC_32BIT, - RADEON_HEAP_GTT, - RADEON_MAX_SLAB_HEAPS, - RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, +enum radeon_heap +{ + RADEON_HEAP_VRAM_NO_CPU_ACCESS, + RADEON_HEAP_VRAM_READ_ONLY, + RADEON_HEAP_VRAM_READ_ONLY_32BIT, + RADEON_HEAP_VRAM_32BIT, + RADEON_HEAP_VRAM, + RADEON_HEAP_GTT_WC, + RADEON_HEAP_GTT_WC_READ_ONLY, + RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, + RADEON_HEAP_GTT_WC_32BIT, + RADEON_HEAP_GTT, + RADEON_MAX_SLAB_HEAPS, + RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, }; static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap heap) { - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_VRAM: - return RADEON_DOMAIN_VRAM; - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT_WC_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - case RADEON_HEAP_GTT: - return RADEON_DOMAIN_GTT; - default: - assert(0); - return (enum radeon_bo_domain)0; - } + switch (heap) { + case RADEON_HEAP_VRAM_NO_CPU_ACCESS: + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_VRAM_32BIT: + case RADEON_HEAP_VRAM: + return RADEON_DOMAIN_VRAM; + case RADEON_HEAP_GTT_WC: + case RADEON_HEAP_GTT_WC_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: + case RADEON_HEAP_GTT: + return RADEON_DOMAIN_GTT; + default: + assert(0); + return (enum radeon_bo_domain)0; + } } static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) { - unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | - (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0); - - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - return flags | - RADEON_FLAG_NO_CPU_ACCESS; - - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY: - return flags | - RADEON_FLAG_READ_ONLY; - - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - return flags | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT; - - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - return flags | - RADEON_FLAG_32BIT; - - case RADEON_HEAP_VRAM: - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT: - default: - return flags; - } + unsigned flags = + RADEON_FLAG_NO_INTERPROCESS_SHARING | (heap != RADEON_HEAP_GTT ? RADEON_FLAG_GTT_WC : 0); + + switch (heap) { + case RADEON_HEAP_VRAM_NO_CPU_ACCESS: + return flags | RADEON_FLAG_NO_CPU_ACCESS; + + case RADEON_HEAP_VRAM_READ_ONLY: + case RADEON_HEAP_GTT_WC_READ_ONLY: + return flags | RADEON_FLAG_READ_ONLY; + + case RADEON_HEAP_VRAM_READ_ONLY_32BIT: + case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: + return flags | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT; + + case RADEON_HEAP_VRAM_32BIT: + case RADEON_HEAP_GTT_WC_32BIT: + return flags | RADEON_FLAG_32BIT; + + case RADEON_HEAP_VRAM: + case RADEON_HEAP_GTT_WC: + case RADEON_HEAP_GTT: + default: + return flags; + } } /* Return the heap index for winsys allocators, or -1 on failure. */ -static inline int radeon_get_heap_index(enum radeon_bo_domain domain, - enum radeon_bo_flag flags) +static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) { - /* VRAM implies WC (write combining) */ - assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); - /* NO_CPU_ACCESS implies VRAM only. */ - assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); - - /* Resources with interprocess sharing don't use any winsys allocators. */ - if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) - return -1; - - /* Unsupported flags: NO_SUBALLOC, SPARSE. */ - if (flags & ~(RADEON_FLAG_GTT_WC | - RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) - return -1; - - switch (domain) { - case RADEON_DOMAIN_VRAM: - switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) { - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: - assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: - assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS: - return RADEON_HEAP_VRAM_NO_CPU_ACCESS; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_READ_ONLY_32BIT; - case RADEON_FLAG_READ_ONLY: - return RADEON_HEAP_VRAM_READ_ONLY; - case RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_32BIT; - case 0: - return RADEON_HEAP_VRAM; - } - break; - case RADEON_DOMAIN_GTT: - switch (flags & (RADEON_FLAG_GTT_WC | - RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT)) { - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: - return RADEON_HEAP_GTT_WC_READ_ONLY; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: - return RADEON_HEAP_GTT_WC_32BIT; - case RADEON_FLAG_GTT_WC: - return RADEON_HEAP_GTT_WC; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_READ_ONLY: - assert(!"READ_ONLY without WC is disallowed"); - return -1; - case RADEON_FLAG_32BIT: - assert(!"32BIT without WC is disallowed"); - return -1; - case 0: - return RADEON_HEAP_GTT; - } - break; - default: - break; - } - return -1; + /* VRAM implies WC (write combining) */ + assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); + /* NO_CPU_ACCESS implies VRAM only. */ + assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); + + /* Resources with interprocess sharing don't use any winsys allocators. */ + if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) + return -1; + + /* Unsupported flags: NO_SUBALLOC, SPARSE. */ + if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) + return -1; + + switch (domain) { + case RADEON_DOMAIN_VRAM: + switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: + assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: + assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); + return -1; + case RADEON_FLAG_NO_CPU_ACCESS: + return RADEON_HEAP_VRAM_NO_CPU_ACCESS; + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_READ_ONLY_32BIT; + case RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_VRAM_READ_ONLY; + case RADEON_FLAG_32BIT: + return RADEON_HEAP_VRAM_32BIT; + case 0: + return RADEON_HEAP_VRAM; + } + break; + case RADEON_DOMAIN_GTT: + switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: + return RADEON_HEAP_GTT_WC_READ_ONLY; + case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: + return RADEON_HEAP_GTT_WC_32BIT; + case RADEON_FLAG_GTT_WC: + return RADEON_HEAP_GTT_WC; + case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: + case RADEON_FLAG_READ_ONLY: + assert(!"READ_ONLY without WC is disallowed"); + return -1; + case RADEON_FLAG_32BIT: + assert(!"32BIT without WC is disallowed"); + return -1; + case 0: + return RADEON_HEAP_GTT; + } + break; + default: + break; + } + return -1; } #endif |