diff options
Diffstat (limited to 'lib/mesa/src')
162 files changed, 38768 insertions, 399 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 9ec051a34..c00997b89 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -82,7 +82,8 @@ enum lp_sampler_lod_control { enum lp_sampler_op_type { LP_SAMPLER_OP_TEXTURE, LP_SAMPLER_OP_FETCH, - LP_SAMPLER_OP_GATHER + LP_SAMPLER_OP_GATHER, + LP_SAMPLER_OP_LODQ }; @@ -165,6 +166,7 @@ struct lp_static_sampler_state unsigned normalized_coords:1; unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */ unsigned lod_bias_non_zero:1; + unsigned max_lod_pos:1; unsigned apply_min_lod:1; /**< min_lod > 0 ? */ unsigned apply_max_lod:1; /**< max_lod < last_level ? */ unsigned seamless_cube_map:1; @@ -321,6 +323,10 @@ struct lp_build_sample_context /** number of lod values (valid are 1, length/4, length) */ unsigned num_lods; + boolean no_quad_lod; + boolean no_brilinear; + boolean no_rho_approx; + /** regular scalar float type */ struct lp_type float_type; struct lp_build_context float_bld; @@ -486,6 +492,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, void lp_build_lod_selector(struct lp_build_sample_context *bld, + boolean is_lodq, unsigned texture_index, unsigned sampler_index, LLVMValueRef s, @@ -496,6 +503,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ unsigned mip_filter, + LLVMValueRef *out_lod, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart, LLVMValueRef *out_lod_positive); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h index 463d44eb4..c92517fee 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -82,6 +82,11 @@ struct lp_build_emit_data { LLVMValueRef output[4]; /** + * Secondary output for instruction that have a second destination register. + */ + LLVMValueRef output1[4]; + + /** * The current instruction that is being 'executed'. */ const struct tgsi_full_instruction * inst; diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am index 8039a957b..878159f23 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am @@ -5,6 +5,7 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CFLAGS = \ -I$(top_srcdir)/src/loader \ -I$(top_srcdir)/src/gallium/winsys \ + -I$(top_builddir)/src/util \ $(GALLIUM_PIPE_LOADER_DEFINES) \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) @@ -40,9 +41,11 @@ libpipe_loader_dynamic_la_SOURCES += \ endif libpipe_loader_static_la_LIBADD = \ - $(top_builddir)/src/loader/libloader.la + $(top_builddir)/src/loader/libloader.la \ + $(top_builddir)/src/util/libxmlconfig.la libpipe_loader_dynamic_la_LIBADD = \ - $(top_builddir)/src/loader/libloader.la + $(top_builddir)/src/loader/libloader.la \ + $(top_builddir)/src/util/libxmlconfig.la EXTRA_DIST = SConscript diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources index d6e3c2c06..66dd22ccc 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources @@ -2,7 +2,8 @@ COMMON_SOURCES := \ pipe_loader.c \ pipe_loader.h \ pipe_loader_priv.h \ - pipe_loader_sw.c + pipe_loader_sw.c \ + driinfo_gallium.h DRM_SOURCES := \ pipe_loader_drm.c diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript b/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript index 14e1b350a..0f72195b2 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript @@ -7,6 +7,7 @@ env.MSVC2013Compat() env.Append(CPPPATH = [ '#/src/loader', '#/src/gallium/winsys', + xmlpool_options.dir.dir, ]) env.Append(CPPDEFINES = [ diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h new file mode 100644 index 000000000..003a3d708 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h @@ -0,0 +1,35 @@ +// DriConf options supported by all Gallium DRI drivers. +DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_MESA_GLTHREAD("false") + DRI_CONF_MESA_NO_ERROR("false") + DRI_CONF_DISABLE_EXT_BUFFER_AGE("false") + DRI_CONF_DISABLE_OML_SYNC_CONTROL("false") +DRI_CONF_SECTION_END + +DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_CELSHADE(0) + DRI_CONF_PP_NORED(0) + DRI_CONF_PP_NOGREEN(0) + DRI_CONF_PP_NOBLUE(0) + DRI_CONF_PP_JIMENEZMLAA(0, 0, 32) + DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32) +DRI_CONF_SECTION_END + +DRI_CONF_SECTION_DEBUG + DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false") + DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") + DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false") + DRI_CONF_FORCE_GLSL_VERSION(0) + DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") + DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false") + DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false") + DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false") + DRI_CONF_FORCE_GLSL_ABS_SQRT("false") + DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false") +DRI_CONF_SECTION_END + +DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false") + DRI_CONF_GLSL_ZERO_INIT("false") +DRI_CONF_SECTION_END diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 250f739c8..d22c24e67 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -30,7 +30,7 @@ * Buffer cache. * * \author Jose Fonseca <jfonseca-at-vmware-dot-com> - * \author Thomas Hellström <thellstom-at-vmware-dot-com> + * \author Thomas Hellström <thellstrom-at-vmware-dot-com> */ diff --git a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h index 0b2c363e1..85c14a786 100644 --- a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h +++ b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h @@ -164,12 +164,12 @@ static const char offsetvs[] = "VERT\n" "DCL OUT[1], GENERIC[0]\n" "DCL OUT[2], GENERIC[10]\n" "DCL OUT[3], GENERIC[11]\n" - "DCL CONST[0]\n" + "DCL CONST[0][0]\n" "IMM FLT32 { 1.0000, 0.0000, -1.0000, 0.0000}\n" " 0: MOV OUT[0], IN[0]\n" " 1: MOV OUT[1], IN[1]\n" - " 2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n" - " 3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n" + " 2: MAD OUT[2], CONST[0][0].xyxy, IMM[0].zyyz, IN[1].xyxy\n" + " 3: MAD OUT[3], CONST[0][0].xyxy, IMM[0].xyyx, IN[1].xyxy\n" " 4: END\n"; @@ -183,7 +183,7 @@ static const char blend2fs_1[] = "FRAG\n" "DCL SVIEW[1], 2D, FLOAT\n" "DCL SAMP[2]\n" "DCL SVIEW[2], 2D, FLOAT\n" - "DCL CONST[0]\n" + "DCL CONST[0][0]\n" "DCL TEMP[0..6]\n" "IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n" "IMM FLT32 { -1.5000, -2.0000, 0.9000, 1.5000}\n" @@ -204,7 +204,7 @@ static const char blend2fs_2[] = " 11: BRK\n" " 12: ENDIF\n" " 13: MOV TEMP[4].y, IMM[0].xxxx\n" - " 14: MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n" + " 14: MAD TEMP[3].xyz, CONST[0][0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n" " 15: MOV TEMP[3].w, IMM[0].xxxx\n" " 16: TXL TEMP[5], TEMP[3], SAMP[2], 2D\n" " 17: MOV TEMP[3].x, TEMP[5].yyyy\n" @@ -229,7 +229,7 @@ static const char blend2fs_2[] = " 36: BRK\n" " 37: ENDIF\n" " 38: MOV TEMP[5].y, IMM[0].xxxx\n" - " 39: MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n" + " 39: MAD TEMP[4].xyz, CONST[0][0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n" " 40: MOV TEMP[4].w, IMM[0].xxxx\n" " 41: TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n" " 42: MOV TEMP[4].x, TEMP[6].yyyy\n" @@ -250,7 +250,7 @@ static const char blend2fs_2[] = " 57: MOV TEMP[5].x, TEMP[1].xxxx\n" " 58: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n" " 59: MOV TEMP[5].z, TEMP[1].xxxx\n" - " 60: MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n" + " 60: MAD TEMP[1], TEMP[5], CONST[0][0].xyxy, IN[0].xyxy\n" " 61: MOV TEMP[4], TEMP[1].xyyy\n" " 62: MOV TEMP[4].w, IMM[0].xxxx\n" " 63: TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n" @@ -278,7 +278,7 @@ static const char blend2fs_2[] = " 85: BRK\n" " 86: ENDIF\n" " 87: MOV TEMP[3].y, IMM[0].xxxx\n" - " 88: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n" + " 88: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n" " 89: MOV TEMP[5].w, IMM[0].xxxx\n" " 90: TXL TEMP[4], TEMP[5], SAMP[2], 2D\n" " 91: MOV TEMP[2].x, TEMP[4].xxxx\n" @@ -303,7 +303,7 @@ static const char blend2fs_2[] = "110: BRK\n" "111: ENDIF\n" "112: MOV TEMP[4].y, IMM[0].xxxx\n" - "113: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n" + "113: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n" "114: MOV TEMP[5].w, IMM[0].xxxx\n" "115: TXL TEMP[6], TEMP[5], SAMP[2], 2D\n" "116: MOV TEMP[3].x, TEMP[6].xxxx\n" @@ -324,7 +324,7 @@ static const char blend2fs_2[] = "131: MOV TEMP[4].y, TEMP[1].xxxx\n" "132: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n" "133: MOV TEMP[4].w, TEMP[1].xxxx\n" - "134: MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n" + "134: MAD TEMP[1], TEMP[4], CONST[0][0].xyxy, IN[0].xyxy\n" "135: MOV TEMP[3], TEMP[1].xyyy\n" "136: MOV TEMP[3].w, IMM[0].xxxx\n" "137: TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n" diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h index e60888fec..8d32f4774 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -76,11 +76,10 @@ struct tgsi_opcode_info unsigned is_tex:1; unsigned is_store:1; unsigned is_branch:1; - int pre_dedent:2; - int post_indent:2; + unsigned pre_dedent:1; + unsigned post_indent:1; enum tgsi_output_mode output_mode:3; - const char *mnemonic; - uint opcode; + unsigned opcode:8; }; const struct tgsi_opcode_info * @@ -112,10 +111,10 @@ static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type) } enum tgsi_opcode_type -tgsi_opcode_infer_src_type( uint opcode ); +tgsi_opcode_infer_src_type( uint opcode, uint src_idx ); enum tgsi_opcode_type -tgsi_opcode_infer_dst_type( uint opcode ); +tgsi_opcode_infer_dst_type( uint opcode, uint dst_idx ); #if defined __cplusplus } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h new file mode 100644 index 000000000..1b2803cf3 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h @@ -0,0 +1,252 @@ +OPCODE(1, 1, COMP, ARL) +OPCODE(1, 1, COMP, MOV) +OPCODE(1, 1, CHAN, LIT) +OPCODE(1, 1, REPL, RCP) +OPCODE(1, 1, REPL, RSQ) +OPCODE(1, 1, CHAN, EXP) +OPCODE(1, 1, CHAN, LOG) +OPCODE(1, 2, COMP, MUL) +OPCODE(1, 2, COMP, ADD) +OPCODE(1, 2, REPL, DP3) +OPCODE(1, 2, REPL, DP4) +OPCODE(1, 2, CHAN, DST) +OPCODE(1, 2, COMP, MIN) +OPCODE(1, 2, COMP, MAX) +OPCODE(1, 2, COMP, SLT) +OPCODE(1, 2, COMP, SGE) +OPCODE(1, 3, COMP, MAD) +OPCODE(1, 2, OTHR, TEX_LZ, .is_tex = 1) +OPCODE(1, 3, COMP, LRP) +OPCODE(1, 3, COMP, FMA) +OPCODE(1, 1, REPL, SQRT) +OPCODE(1, 2, COMP, LDEXP) +OPCODE(1, 1, COMP, F2U64) +OPCODE(1, 1, COMP, F2I64) +OPCODE(1, 1, COMP, FRC) +OPCODE(1, 2, OTHR, TXF_LZ, .is_tex = 1) +OPCODE(1, 1, COMP, FLR) +OPCODE(1, 1, COMP, ROUND) +OPCODE(1, 1, REPL, EX2) +OPCODE(1, 1, REPL, LG2) +OPCODE(1, 2, REPL, POW) +OPCODE_GAP(31) /* removed */ +OPCODE(1, 1, COMP, U2I64) +OPCODE(1, 0, OTHR, CLOCK) +OPCODE(1, 1, COMP, I2I64) +OPCODE_GAP(35) /* removed */ +OPCODE(1, 1, REPL, COS) +OPCODE(1, 1, COMP, DDX) +OPCODE(1, 1, COMP, DDY) +OPCODE(0, 0, NONE, KILL) +OPCODE(1, 1, REPL, PK2H) +OPCODE(1, 1, REPL, PK2US) +OPCODE(1, 1, REPL, PK4B) +OPCODE(1, 1, REPL, PK4UB) +OPCODE(1, 1, COMP, D2U64) +OPCODE(1, 2, COMP, SEQ) +OPCODE(1, 1, COMP, D2I64) +OPCODE(1, 2, COMP, SGT) +OPCODE(1, 1, REPL, SIN) +OPCODE(1, 2, COMP, SLE) +OPCODE(1, 2, COMP, SNE) +OPCODE(1, 1, COMP, U642D) +OPCODE(1, 2, OTHR, TEX, .is_tex = 1) +OPCODE(1, 4, OTHR, TXD, .is_tex = 1) +OPCODE(1, 2, OTHR, TXP, .is_tex = 1) +OPCODE(1, 1, CHAN, UP2H) +OPCODE(1, 1, CHAN, UP2US) +OPCODE(1, 1, CHAN, UP4B) +OPCODE(1, 1, CHAN, UP4UB) +OPCODE(1, 1, COMP, U642F) +OPCODE(1, 1, COMP, I642F) +OPCODE(1, 1, COMP, ARR) +OPCODE(1, 1, COMP, I642D) +OPCODE(0, 0, NONE, CAL, .is_branch = 1) +OPCODE(0, 0, NONE, RET) +OPCODE(1, 1, COMP, SSG) +OPCODE(1, 3, COMP, CMP) +OPCODE_GAP(67) /* removed */ +OPCODE(1, 2, OTHR, TXB, .is_tex = 1) +OPCODE(1, 1, OTHR, FBFETCH) +OPCODE(1, 2, COMP, DIV) +OPCODE(1, 2, REPL, DP2) +OPCODE(1, 2, OTHR, TXL, .is_tex = 1) +OPCODE(0, 0, NONE, BRK) +OPCODE(0, 1, NONE, IF, .is_branch = 1, .post_indent = 1) +OPCODE(0, 1, NONE, UIF, .is_branch = 1, .post_indent = 1) +OPCODE(1, 2, COMP, READ_INVOC) +OPCODE(0, 0, NONE, ELSE, .is_branch = 1, .pre_dedent = 1, .post_indent = 1) +OPCODE(0, 0, NONE, ENDIF, .pre_dedent = 1) +OPCODE(1, 1, COMP, DDX_FINE) +OPCODE(1, 1, COMP, DDY_FINE) +OPCODE_GAP(81) /* removed */ +OPCODE_GAP(82) /* removed */ +OPCODE(1, 1, COMP, CEIL) +OPCODE(1, 1, COMP, I2F) +OPCODE(1, 1, COMP, NOT) +OPCODE(1, 1, COMP, TRUNC) +OPCODE(1, 2, COMP, SHL) +OPCODE(1, 1, OTHR, BALLOT) +OPCODE(1, 2, COMP, AND) +OPCODE(1, 2, COMP, OR) +OPCODE(1, 2, COMP, MOD) +OPCODE(1, 2, COMP, XOR) +OPCODE_GAP(93) /* removed */ +OPCODE(1, 2, OTHR, TXF, .is_tex = 1) +OPCODE(1, 2, OTHR, TXQ, .is_tex = 1) +OPCODE(0, 0, NONE, CONT) +OPCODE(0, 1, NONE, EMIT) +OPCODE(0, 1, NONE, ENDPRIM) +OPCODE(0, 0, NONE, BGNLOOP, .is_branch = 1, .post_indent = 1) +OPCODE(0, 0, NONE, BGNSUB, .post_indent = 1) +OPCODE(0, 0, NONE, ENDLOOP, .is_branch = 1, .pre_dedent = 1) +OPCODE(0, 0, NONE, ENDSUB, .pre_dedent = 1) +OPCODE_GAP(103) /* removed */ +OPCODE(1, 1, OTHR, TXQS, .is_tex = 1) +OPCODE(1, 1, OTHR, RESQ) +OPCODE(1, 1, COMP, READ_FIRST) +OPCODE(0, 0, NONE, NOP) +OPCODE(1, 2, COMP, FSEQ) +OPCODE(1, 2, COMP, FSGE) +OPCODE(1, 2, COMP, FSLT) +OPCODE(1, 2, COMP, FSNE) +OPCODE(0, 1, OTHR, MEMBAR) +OPCODE_GAP(113) /* removed */ +OPCODE_GAP(114) /* removed */ +OPCODE_GAP(115) /* removed */ +OPCODE(0, 1, NONE, KILL_IF) +OPCODE(0, 0, NONE, END) +OPCODE(1, 3, COMP, DFMA) +OPCODE(1, 1, COMP, F2I) +OPCODE(1, 2, COMP, IDIV) +OPCODE(1, 2, COMP, IMAX) +OPCODE(1, 2, COMP, IMIN) +OPCODE(1, 1, COMP, INEG) +OPCODE(1, 2, COMP, ISGE) +OPCODE(1, 2, COMP, ISHR) +OPCODE(1, 2, COMP, ISLT) +OPCODE(1, 1, COMP, F2U) +OPCODE(1, 1, COMP, U2F) +OPCODE(1, 2, COMP, UADD) +OPCODE(1, 2, COMP, UDIV) +OPCODE(1, 3, COMP, UMAD) +OPCODE(1, 2, COMP, UMAX) +OPCODE(1, 2, COMP, UMIN) +OPCODE(1, 2, COMP, UMOD) +OPCODE(1, 2, COMP, UMUL) +OPCODE(1, 2, COMP, USEQ) +OPCODE(1, 2, COMP, USGE) +OPCODE(1, 2, COMP, USHR) +OPCODE(1, 2, COMP, USLT) +OPCODE(1, 2, COMP, USNE) +OPCODE(0, 1, NONE, SWITCH) +OPCODE(0, 1, NONE, CASE) +OPCODE(0, 0, NONE, DEFAULT) +OPCODE(0, 0, NONE, ENDSWITCH) + +OPCODE(1, 3, OTHR, SAMPLE) +OPCODE(1, 2, OTHR, SAMPLE_I) +OPCODE(1, 3, OTHR, SAMPLE_I_MS) +OPCODE(1, 4, OTHR, SAMPLE_B) +OPCODE(1, 4, OTHR, SAMPLE_C) +OPCODE(1, 4, OTHR, SAMPLE_C_LZ) +OPCODE(1, 5, OTHR, SAMPLE_D) +OPCODE(1, 4, OTHR, SAMPLE_L) +OPCODE(1, 3, OTHR, GATHER4) +OPCODE(1, 2, OTHR, SVIEWINFO) +OPCODE(1, 2, OTHR, SAMPLE_POS) +OPCODE(1, 2, OTHR, SAMPLE_INFO) +OPCODE(1, 1, COMP, UARL) +OPCODE(1, 3, COMP, UCMP) +OPCODE(1, 1, COMP, IABS) +OPCODE(1, 1, COMP, ISSG) +OPCODE(1, 2, OTHR, LOAD) +OPCODE(1, 2, OTHR, STORE, .is_store = 1) +OPCODE_GAP(163) /* removed */ +OPCODE_GAP(164) /* removed */ +OPCODE_GAP(165) /* removed */ +OPCODE(0, 0, OTHR, BARRIER) + +OPCODE(1, 3, OTHR, ATOMUADD, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMXCHG, .is_store = 1) +OPCODE(1, 4, OTHR, ATOMCAS, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMAND, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMOR, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMXOR, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMUMIN, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMUMAX, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMIMIN, .is_store = 1) +OPCODE(1, 3, OTHR, ATOMIMAX, .is_store = 1) +OPCODE(1, 3, OTHR, TEX2, .is_tex = 1) +OPCODE(1, 3, OTHR, TXB2, .is_tex = 1) +OPCODE(1, 3, OTHR, TXL2, .is_tex = 1) +OPCODE(1, 2, COMP, IMUL_HI) +OPCODE(1, 2, COMP, UMUL_HI) +OPCODE(1, 3, OTHR, TG4, .is_tex = 1) +OPCODE(1, 2, OTHR, LODQ, .is_tex = 1) +OPCODE(1, 3, COMP, IBFE) +OPCODE(1, 3, COMP, UBFE) +OPCODE(1, 4, COMP, BFI) +OPCODE(1, 1, COMP, BREV) +OPCODE(1, 1, COMP, POPC) +OPCODE(1, 1, COMP, LSB) +OPCODE(1, 1, COMP, IMSB) +OPCODE(1, 1, COMP, UMSB) +OPCODE(1, 1, OTHR, INTERP_CENTROID) +OPCODE(1, 2, OTHR, INTERP_SAMPLE) +OPCODE(1, 2, OTHR, INTERP_OFFSET) +OPCODE(1, 1, COMP, F2D) +OPCODE(1, 1, COMP, D2F) +OPCODE(1, 1, COMP, DABS) +OPCODE(1, 1, COMP, DNEG) +OPCODE(1, 2, COMP, DADD) +OPCODE(1, 2, COMP, DMUL) +OPCODE(1, 2, COMP, DMAX) +OPCODE(1, 2, COMP, DMIN) +OPCODE(1, 2, COMP, DSLT) +OPCODE(1, 2, COMP, DSGE) +OPCODE(1, 2, COMP, DSEQ) +OPCODE(1, 2, COMP, DSNE) +OPCODE(1, 1, COMP, DRCP) +OPCODE(1, 1, COMP, DSQRT) +OPCODE(1, 3, COMP, DMAD) +OPCODE(1, 1, COMP, DFRAC) +OPCODE(1, 2, COMP, DLDEXP) +OPCODE(2, 1, REPL, DFRACEXP) +OPCODE(1, 1, COMP, D2I) +OPCODE(1, 1, COMP, I2D) +OPCODE(1, 1, COMP, D2U) +OPCODE(1, 1, COMP, U2D) +OPCODE(1, 1, COMP, DRSQ) +OPCODE(1, 1, COMP, DTRUNC) +OPCODE(1, 1, COMP, DCEIL) +OPCODE(1, 1, COMP, DFLR) +OPCODE(1, 1, COMP, DROUND) +OPCODE(1, 1, COMP, DSSG) +OPCODE(1, 1, COMP, VOTE_ANY) +OPCODE(1, 1, COMP, VOTE_ALL) +OPCODE(1, 1, COMP, VOTE_EQ) +OPCODE(1, 2, COMP, U64SEQ) +OPCODE(1, 2, COMP, U64SNE) +OPCODE(1, 2, COMP, I64SLT) +OPCODE(1, 2, COMP, U64SLT) +OPCODE(1, 2, COMP, I64SGE) +OPCODE(1, 2, COMP, U64SGE) +OPCODE(1, 2, COMP, I64MIN) +OPCODE(1, 2, COMP, U64MIN) +OPCODE(1, 2, COMP, I64MAX) +OPCODE(1, 2, COMP, U64MAX) +OPCODE(1, 1, COMP, I64ABS) +OPCODE(1, 1, COMP, I64SSG) +OPCODE(1, 1, COMP, I64NEG) +OPCODE(1, 2, COMP, U64ADD) +OPCODE(1, 2, COMP, U64MUL) +OPCODE(1, 2, COMP, U64SHL) +OPCODE(1, 2, COMP, I64SHR) +OPCODE(1, 2, COMP, U64SHR) +OPCODE(1, 2, COMP, I64DIV) +OPCODE(1, 2, COMP, U64DIV) +OPCODE(1, 2, COMP, I64MOD) +OPCODE(1, 2, COMP, U64MOD) +OPCODE(1, 2, COMP, DDIV) +OPCODE(1, 3, OTHR, LOD) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h index 20e4f843a..fd4c38f01 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h @@ -55,8 +55,6 @@ struct tgsi_lowering_config * enable lowering of TGSI_OPCODE_<opc> */ unsigned lower_DST:1; - unsigned lower_XPD:1; - unsigned lower_SCS:1; unsigned lower_LRP:1; unsigned lower_FRC:1; unsigned lower_POW:1; @@ -65,9 +63,7 @@ struct tgsi_lowering_config unsigned lower_LOG:1; unsigned lower_DP4:1; unsigned lower_DP3:1; - unsigned lower_DPH:1; unsigned lower_DP2:1; - unsigned lower_DP2A:1; unsigned lower_FLR:1; unsigned lower_CEIL:1; unsigned lower_TRUNC:1; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c b/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c index 9d831efcf..e87e53016 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c @@ -62,36 +62,36 @@ util_dump_enum_continuous(unsigned value, } -#define DEFINE_UTIL_DUMP_CONTINUOUS(_name) \ +#define DEFINE_UTIL_STR_CONTINUOUS(_name) \ const char * \ - util_dump_##_name(unsigned value, boolean shortened) \ + util_str_##_name(unsigned value, boolean shortened) \ { \ if(shortened) \ - return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \ + return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \ else \ - return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \ + return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \ } /** - * Same as DEFINE_UTIL_DUMP_CONTINUOUS but with static assertions to detect + * Same as DEFINE_UTIL_STR_CONTINUOUS but with static assertions to detect * failures to update lists. */ -#define DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(_name, _count) \ +#define DEFINE_UTIL_STR_CONTINUOUS_COUNT(_name, _count) \ const char * \ - util_dump_##_name(unsigned value, boolean shortened) \ + util_str_##_name(unsigned value, boolean shortened) \ { \ - STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_names) == _count); \ - STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_short_names) == _count); \ + STATIC_ASSERT(ARRAY_SIZE(util_##_name##_names) == _count); \ + STATIC_ASSERT(ARRAY_SIZE(util_##_name##_short_names) == _count); \ if(shortened) \ - return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \ + return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \ else \ - return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \ + return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \ } static const char * -util_dump_blend_factor_names[] = { +util_blend_factor_names[] = { UTIL_DUMP_INVALID_NAME, /* 0x0 */ "PIPE_BLENDFACTOR_ONE", "PIPE_BLENDFACTOR_SRC_COLOR", @@ -122,7 +122,7 @@ util_dump_blend_factor_names[] = { }; static const char * -util_dump_blend_factor_short_names[] = { +util_blend_factor_short_names[] = { UTIL_DUMP_INVALID_NAME, /* 0x0 */ "one", "src_color", @@ -152,11 +152,11 @@ util_dump_blend_factor_short_names[] = { "inv_src1_alpha" }; -DEFINE_UTIL_DUMP_CONTINUOUS(blend_factor) +DEFINE_UTIL_STR_CONTINUOUS(blend_factor) static const char * -util_dump_blend_func_names[] = { +util_blend_func_names[] = { "PIPE_BLEND_ADD", "PIPE_BLEND_SUBTRACT", "PIPE_BLEND_REVERSE_SUBTRACT", @@ -165,7 +165,7 @@ util_dump_blend_func_names[] = { }; static const char * -util_dump_blend_func_short_names[] = { +util_blend_func_short_names[] = { "add", "sub", "rev_sub", @@ -173,11 +173,11 @@ util_dump_blend_func_short_names[] = { "max" }; -DEFINE_UTIL_DUMP_CONTINUOUS(blend_func) +DEFINE_UTIL_STR_CONTINUOUS(blend_func) static const char * -util_dump_logicop_names[] = { +util_logicop_names[] = { "PIPE_LOGICOP_CLEAR", "PIPE_LOGICOP_NOR", "PIPE_LOGICOP_AND_INVERTED", @@ -197,7 +197,7 @@ util_dump_logicop_names[] = { }; static const char * -util_dump_logicop_short_names[] = { +util_logicop_short_names[] = { "clear", "nor", "and_inverted", @@ -216,11 +216,11 @@ util_dump_logicop_short_names[] = { "set" }; -DEFINE_UTIL_DUMP_CONTINUOUS(logicop) +DEFINE_UTIL_STR_CONTINUOUS(logicop) static const char * -util_dump_func_names[] = { +util_func_names[] = { "PIPE_FUNC_NEVER", "PIPE_FUNC_LESS", "PIPE_FUNC_EQUAL", @@ -232,7 +232,7 @@ util_dump_func_names[] = { }; static const char * -util_dump_func_short_names[] = { +util_func_short_names[] = { "never", "less", "equal", @@ -243,11 +243,11 @@ util_dump_func_short_names[] = { "always" }; -DEFINE_UTIL_DUMP_CONTINUOUS(func) +DEFINE_UTIL_STR_CONTINUOUS(func) static const char * -util_dump_stencil_op_names[] = { +util_stencil_op_names[] = { "PIPE_STENCIL_OP_KEEP", "PIPE_STENCIL_OP_ZERO", "PIPE_STENCIL_OP_REPLACE", @@ -259,7 +259,7 @@ util_dump_stencil_op_names[] = { }; static const char * -util_dump_stencil_op_short_names[] = { +util_stencil_op_short_names[] = { "keep", "zero", "replace", @@ -270,11 +270,11 @@ util_dump_stencil_op_short_names[] = { "invert" }; -DEFINE_UTIL_DUMP_CONTINUOUS(stencil_op) +DEFINE_UTIL_STR_CONTINUOUS(stencil_op) static const char * -util_dump_tex_target_names[] = { +util_tex_target_names[] = { "PIPE_BUFFER", "PIPE_TEXTURE_1D", "PIPE_TEXTURE_2D", @@ -287,7 +287,7 @@ util_dump_tex_target_names[] = { }; static const char * -util_dump_tex_target_short_names[] = { +util_tex_target_short_names[] = { "buffer", "1d", "2d", @@ -299,11 +299,11 @@ util_dump_tex_target_short_names[] = { "cube_array", }; -DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES) +DEFINE_UTIL_STR_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES) static const char * -util_dump_tex_wrap_names[] = { +util_tex_wrap_names[] = { "PIPE_TEX_WRAP_REPEAT", "PIPE_TEX_WRAP_CLAMP", "PIPE_TEX_WRAP_CLAMP_TO_EDGE", @@ -315,7 +315,7 @@ util_dump_tex_wrap_names[] = { }; static const char * -util_dump_tex_wrap_short_names[] = { +util_tex_wrap_short_names[] = { "repeat", "clamp", "clamp_to_edge", @@ -326,45 +326,46 @@ util_dump_tex_wrap_short_names[] = { "mirror_clamp_to_border" }; -DEFINE_UTIL_DUMP_CONTINUOUS(tex_wrap) +DEFINE_UTIL_STR_CONTINUOUS(tex_wrap) static const char * -util_dump_tex_mipfilter_names[] = { +util_tex_mipfilter_names[] = { "PIPE_TEX_MIPFILTER_NEAREST", "PIPE_TEX_MIPFILTER_LINEAR", "PIPE_TEX_MIPFILTER_NONE" }; static const char * -util_dump_tex_mipfilter_short_names[] = { +util_tex_mipfilter_short_names[] = { "nearest", "linear", "none" }; -DEFINE_UTIL_DUMP_CONTINUOUS(tex_mipfilter) +DEFINE_UTIL_STR_CONTINUOUS(tex_mipfilter) static const char * -util_dump_tex_filter_names[] = { +util_tex_filter_names[] = { "PIPE_TEX_FILTER_NEAREST", "PIPE_TEX_FILTER_LINEAR" }; static const char * -util_dump_tex_filter_short_names[] = { +util_tex_filter_short_names[] = { "nearest", "linear" }; -DEFINE_UTIL_DUMP_CONTINUOUS(tex_filter) +DEFINE_UTIL_STR_CONTINUOUS(tex_filter) static const char * -util_dump_query_type_names[] = { +util_query_type_names[] = { "PIPE_QUERY_OCCLUSION_COUNTER", "PIPE_QUERY_OCCLUSION_PREDICATE", + "PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE", "PIPE_QUERY_TIMESTAMP", "PIPE_QUERY_TIMESTAMP_DISJOINT", "PIPE_QUERY_TIME_ELAPSED", @@ -372,12 +373,13 @@ util_dump_query_type_names[] = { "PIPE_QUERY_PRIMITIVES_EMITTED", "PIPE_QUERY_SO_STATISTICS", "PIPE_QUERY_SO_OVERFLOW_PREDICATE", + "PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE", "PIPE_QUERY_GPU_FINISHED", "PIPE_QUERY_PIPELINE_STATISTICS", }; static const char * -util_dump_query_type_short_names[] = { +util_query_type_short_names[] = { "occlusion_counter", "occlusion_predicate", "timestamp", @@ -391,11 +393,30 @@ util_dump_query_type_short_names[] = { "pipeline_statistics", }; -DEFINE_UTIL_DUMP_CONTINUOUS(query_type) +DEFINE_UTIL_STR_CONTINUOUS(query_type) static const char * -util_dump_prim_mode_names[] = { +util_query_value_type_names[] = { + "PIPE_QUERY_TYPE_I32", + "PIPE_QUERY_TYPE_U32", + "PIPE_QUERY_TYPE_I64", + "PIPE_QUERY_TYPE_U64", +}; + +static const char * +util_query_value_type_short_names[] = { + "i32", + "u32", + "i64", + "u64", +}; + +DEFINE_UTIL_STR_CONTINUOUS(query_value_type) + + +static const char * +util_prim_mode_names[] = { "PIPE_PRIM_POINTS", "PIPE_PRIM_LINES", "PIPE_PRIM_LINE_LOOP", @@ -414,7 +435,7 @@ util_dump_prim_mode_names[] = { }; static const char * -util_dump_prim_mode_short_names[] = { +util_prim_mode_short_names[] = { "points", "lines", "line_loop", @@ -432,4 +453,20 @@ util_dump_prim_mode_short_names[] = { "patches", }; -DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode) +DEFINE_UTIL_STR_CONTINUOUS(prim_mode) + +void +util_dump_query_type(FILE *stream, unsigned value) +{ + if (value >= PIPE_QUERY_DRIVER_SPECIFIC) + fprintf(stream, "PIPE_QUERY_DRIVER_SPECIFIC + %i", + value - PIPE_QUERY_DRIVER_SPECIFIC); + else + fprintf(stream, "%s", util_str_query_type(value, false)); +} + +void +util_dump_query_value_type(FILE *stream, unsigned value) +{ + fprintf(stream, "%s", util_str_query_value_type(value, false)); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c index 8c4f2150b..3f755e536 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c @@ -28,136 +28,15 @@ #include "u_format.h" #include "u_format_s3tc.h" #include "util/format_srgb.h" +#include "../../../mesa/main/texcompress_s3tc_tmp.h" -#if defined(_WIN32) || defined(WIN32) -#define DXTN_LIBNAME "dxtn.dll" -#elif defined(__CYGWIN__) -#define DXTN_LIBNAME "cygtxc_dxtn.dll" -#elif defined(__APPLE__) -#define DXTN_LIBNAME "libtxc_dxtn.dylib" -#else -#define DXTN_LIBNAME "libtxc_dxtn.so" -#endif +util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1; +util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1; +util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3; +util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5; - -static void -util_format_dxt1_rgb_fetch_stub(int src_stride, - const uint8_t *src, - int col, int row, - uint8_t *dst) -{ - assert(0); -} - - -static void -util_format_dxt1_rgba_fetch_stub(int src_stride, - const uint8_t *src, - int col, int row, - uint8_t *dst ) -{ - assert(0); -} - - -static void -util_format_dxt3_rgba_fetch_stub(int src_stride, - const uint8_t *src, - int col, int row, - uint8_t *dst ) -{ - assert(0); -} - - -static void -util_format_dxt5_rgba_fetch_stub(int src_stride, - const uint8_t *src, - int col, int row, - uint8_t *dst ) -{ - assert(0); -} - - -static void -util_format_dxtn_pack_stub(int src_comps, - int width, int height, - const uint8_t *src, - enum util_format_dxtn dst_format, - uint8_t *dst, - int dst_stride) -{ - assert(0); -} - - -boolean util_format_s3tc_enabled = FALSE; - -util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub; -util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub; -util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub; -util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub; - -util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub; - - -void -util_format_s3tc_init(void) -{ - static boolean first_time = TRUE; - struct util_dl_library *library = NULL; - util_dl_proc fetch_2d_texel_rgb_dxt1; - util_dl_proc fetch_2d_texel_rgba_dxt1; - util_dl_proc fetch_2d_texel_rgba_dxt3; - util_dl_proc fetch_2d_texel_rgba_dxt5; - util_dl_proc tx_compress_dxtn; - - if (!first_time) - return; - first_time = FALSE; - - if (util_format_s3tc_enabled) - return; - - library = util_dl_open(DXTN_LIBNAME); - if (!library) { - debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " - "compression/decompression unavailable\n"); - return; - } - - fetch_2d_texel_rgb_dxt1 = - util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1"); - fetch_2d_texel_rgba_dxt1 = - util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1"); - fetch_2d_texel_rgba_dxt3 = - util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3"); - fetch_2d_texel_rgba_dxt5 = - util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5"); - tx_compress_dxtn = - util_dl_get_proc_address(library, "tx_compress_dxtn"); - - if (!util_format_dxt1_rgb_fetch || - !util_format_dxt1_rgba_fetch || - !util_format_dxt3_rgba_fetch || - !util_format_dxt5_rgba_fetch || - !util_format_dxtn_pack) { - debug_printf("couldn't reference all symbols in " DXTN_LIBNAME - ", software DXTn compression/decompression " - "unavailable\n"); - util_dl_close(library); - return; - } - - util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1; - util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1; - util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3; - util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5; - util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn; - util_format_s3tc_enabled = TRUE; -} +util_format_dxtn_pack_t util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn; /* diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h index ae20010cd..6f188c67f 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h @@ -58,8 +58,6 @@ typedef void uint8_t *dst, int dst_stride); -extern boolean util_format_s3tc_enabled; - extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch; extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch; extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch; @@ -69,10 +67,6 @@ extern util_format_dxtn_pack_t util_format_dxtn_pack; void -util_format_s3tc_init(void); - - -void util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); void diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c index 3075ea0ab..9c9a5838d 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c @@ -140,6 +140,12 @@ util_format_test_cases[] = {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3fffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, @@ -164,6 +170,19 @@ util_format_test_cases[] = {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xfffe), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c new file mode 100644 index 000000000..26104552e --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2017 Valve Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A simple allocator that allocates and release "numbers". + * + * @author Samuel Pitoiset <samuel.pitoiset@gmail.com> + */ + +#include "util/u_idalloc.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +void +util_idalloc_init(struct util_idalloc *buf) +{ + memset(buf, 0, sizeof(*buf)); +} + +void +util_idalloc_fini(struct util_idalloc *buf) +{ + if (buf->data) + free(buf->data); +} + +void +util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements) +{ + new_num_elements = align(new_num_elements, 32); + + if (new_num_elements > buf->num_elements) { + unsigned i; + + buf->data = realloc(buf->data, + (new_num_elements / 32) * sizeof(*buf->data)); + + for (i = buf->num_elements / 32; i < new_num_elements / 32; i++) + buf->data[i] = 0; + buf->num_elements = new_num_elements; + } +} + +unsigned +util_idalloc_alloc(struct util_idalloc *buf) +{ + unsigned num_elements = buf->num_elements; + + for (unsigned i = 0; i < num_elements / 32; i++) { + if (buf->data[i] == 0xffffffff) + continue; + + unsigned bit = ffs(~buf->data[i]) - 1; + buf->data[i] |= 1u << bit; + return i * 32 + bit; + } + + /* No slots available, resize and return the first free. */ + util_idalloc_resize(buf, num_elements * 2); + + buf->data[num_elements / 32] |= 1 << (num_elements % 32); + + return num_elements; +} + +void +util_idalloc_free(struct util_idalloc *buf, unsigned id) +{ + assert(id < buf->num_elements); + buf->data[id / 32] &= ~(1 << (id % 32)); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h new file mode 100644 index 000000000..82469e94d --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2017 Valve Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_IDALLOC_H +#define U_IDALLOC_H + +#include <inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct util_idalloc +{ + uint32_t *data; + unsigned num_elements; +}; + +void +util_idalloc_init(struct util_idalloc *buf); + +void +util_idalloc_fini(struct util_idalloc *buf); + +void +util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements); + +unsigned +util_idalloc_alloc(struct util_idalloc *buf); + +void +util_idalloc_free(struct util_idalloc *buf, unsigned id); + +#ifdef __cplusplus +} +#endif + +#endif /* U_IDALLOC_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_log.c b/lib/mesa/src/gallium/auxiliary/util/u_log.c new file mode 100644 index 000000000..dacbe0505 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_log.c @@ -0,0 +1,235 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "u_log.h" + +#include "u_memory.h" +#include "util/u_string.h" + +struct page_entry { + const struct u_log_chunk_type *type; + void *data; +}; + +struct u_log_page { + struct page_entry *entries; + unsigned num_entries; + unsigned max_entries; +}; + +struct u_log_auto_logger { + u_auto_log_fn *callback; + void *data; +}; + +/** + * Initialize the given logging context. + */ +void +u_log_context_init(struct u_log_context *ctx) +{ + memset(ctx, 0, sizeof(*ctx)); +} + +/** + * Free all resources associated with the given logging context. + * + * Pages taken from the context via \ref u_log_new_page must be destroyed + * separately. + */ +void +u_log_context_destroy(struct u_log_context *ctx) +{ + u_log_page_destroy(ctx->cur); + FREE(ctx->auto_loggers); + memset(ctx, 0, sizeof(*ctx)); +} + +/** + * Add an auto logger. + * + * Auto loggers are called each time a chunk is added to the log. + */ +void +u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback, + void *data) +{ + struct u_log_auto_logger *new_auto_loggers = + REALLOC(ctx->auto_loggers, + sizeof(*new_auto_loggers) * ctx->num_auto_loggers, + sizeof(*new_auto_loggers) * (ctx->num_auto_loggers + 1)); + if (!new_auto_loggers) { + fprintf(stderr, "Gallium u_log: out of memory\n"); + return; + } + + unsigned idx = ctx->num_auto_loggers++; + ctx->auto_loggers = new_auto_loggers; + ctx->auto_loggers[idx].callback = callback; + ctx->auto_loggers[idx].data = data; +} + +/** + * Make sure that auto loggers have run. + */ +void +u_log_flush(struct u_log_context *ctx) +{ + if (!ctx->num_auto_loggers) + return; + + struct u_log_auto_logger *auto_loggers = ctx->auto_loggers; + unsigned num_auto_loggers = ctx->num_auto_loggers; + + /* Prevent recursion. */ + ctx->num_auto_loggers = 0; + ctx->auto_loggers = NULL; + + for (unsigned i = 0; i < num_auto_loggers; ++i) + auto_loggers[i].callback(auto_loggers[i].data, ctx); + + assert(!ctx->num_auto_loggers); + ctx->num_auto_loggers = num_auto_loggers; + ctx->auto_loggers = auto_loggers; +} + +static void str_print(void *data, FILE *stream) +{ + fputs((char *)data, stream); +} + +static const struct u_log_chunk_type str_chunk_type = { + .destroy = free, + .print = str_print, +}; + +void +u_log_printf(struct u_log_context *ctx, const char *fmt, ...) +{ + va_list va; + char *str = NULL; + + va_start(va, fmt); + int ret = util_vasprintf(&str, fmt, va); + va_end(va); + + if (ret >= 0) { + u_log_chunk(ctx, &str_chunk_type, str); + } else { + fprintf(stderr, "Gallium u_log_printf: out of memory\n"); + } +} + +/** + * Add a custom chunk to the log. + * + * type->destroy will be called as soon as \p data is no longer needed. + */ +void +u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type, + void *data) +{ + struct u_log_page *page = ctx->cur; + + u_log_flush(ctx); + + if (!page) { + ctx->cur = CALLOC_STRUCT(u_log_page); + page = ctx->cur; + if (!page) + goto out_of_memory; + } + + if (page->num_entries >= page->max_entries) { + unsigned new_max_entries = MAX2(16, page->num_entries * 2); + struct page_entry *new_entries = REALLOC(page->entries, + page->max_entries * sizeof(*page->entries), + new_max_entries * sizeof(*page->entries)); + if (!new_entries) + goto out_of_memory; + + page->entries = new_entries; + page->max_entries = new_max_entries; + } + + page->entries[page->num_entries].type = type; + page->entries[page->num_entries].data = data; + page->num_entries++; + return; + +out_of_memory: + fprintf(stderr, "Gallium: u_log: out of memory\n"); +} + +/** + * Convenience helper that starts a new page and prints the previous one. + */ +void +u_log_new_page_print(struct u_log_context *ctx, FILE *stream) +{ + if (ctx->cur) { + u_log_page_print(ctx->cur, stream); + u_log_page_destroy(ctx->cur); + ctx->cur = NULL; + } +} + +/** + * Return the current page from the logging context and start a new one. + * + * The caller is responsible for destroying the returned page. + */ +struct u_log_page * +u_log_new_page(struct u_log_context *ctx) +{ + struct u_log_page *page = ctx->cur; + ctx->cur = NULL; + return page; +} + +/** + * Free all data associated with \p page. + */ +void +u_log_page_destroy(struct u_log_page *page) +{ + if (!page) + return; + + for (unsigned i = 0; i < page->num_entries; ++i) { + if (page->entries[i].type->destroy) + page->entries[i].type->destroy(page->entries[i].data); + } + FREE(page->entries); + FREE(page); +} + +/** + * Print the given page to \p stream. + */ +void +u_log_page_print(struct u_log_page *page, FILE *stream) +{ + for (unsigned i = 0; i < page->num_entries; ++i) + page->entries[i].type->print(page->entries[i].data, stream); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_log.h b/lib/mesa/src/gallium/auxiliary/util/u_log.h new file mode 100644 index 000000000..09c47caee --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_log.h @@ -0,0 +1,100 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file u_log.h + * @brief Context logging facilities + * + * Provides a means of logging context events (draw calls, command streams, ...) + * into files. + * + * Log entries start their life cycle as "chunks". Chunks can be plain text + * written by \ref u_log_printf or custom internal representations added by + * \ref u_log_chunk that are only converted to text on-demand (e.g. for higher + * performance pipelined hang-debugging). + * + * Chunks are accumulated into "pages". The manager of the log can periodically + * take out the current page using \ref u_log_new_page and dump it to a file. + * + * Furthermore, "auto loggers" can be added to a context, which are callbacks + * that are given the opportunity to add their own logging each time a chunk is + * added. Drivers can use this to lazily log chunks of their command stream. + * Lazy loggers don't need to be re-entrant. + */ + +#ifndef U_LOG_H +#define U_LOG_H + +#include <stdio.h> + +#include "u_debug.h" + +struct u_log_page; +struct u_log_auto_logger; + +struct u_log_chunk_type { + void (*destroy)(void *data); + void (*print)(void *data, FILE *stream); +}; + +struct u_log_context { + struct u_log_page *cur; + struct u_log_auto_logger *auto_loggers; + unsigned num_auto_loggers; +}; + +typedef void (u_auto_log_fn)(void *data, struct u_log_context *ctx); + +void +u_log_context_init(struct u_log_context *ctx); + +void +u_log_context_destroy(struct u_log_context *ctx); + +void +u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback, + void *data); + +void +u_log_flush(struct u_log_context *ctx); + +void +u_log_printf(struct u_log_context *ctx, const char *fmt, ...) _util_printf_format(2,3); + +void +u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type, + void *data); + +void +u_log_new_page_print(struct u_log_context *ctx, FILE *stream); + +struct u_log_page * +u_log_new_page(struct u_log_context *ctx); + +void +u_log_page_destroy(struct u_log_page *page); + +void +u_log_page_print(struct u_log_page *page, FILE *stream); + +#endif /* U_LOG_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_mm.c b/lib/mesa/src/gallium/auxiliary/util/u_mm.c index bd4c4e1b1..7a45e2919 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_mm.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_mm.c @@ -183,7 +183,10 @@ u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) assert(size >= 0); assert(align2 >= 0); - assert(align2 <= 12); /* sanity check, 2^12 (4KB) enough? */ + /* Make sure that a byte alignment isn't getting passed for our + * power-of-two alignment arg. + */ + assert(align2 < 32); if (!heap || align2 < 0 || size <= 0) return NULL; diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h b/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h index 4927470e4..8623e1b3b 100644 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h +++ b/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h @@ -45,7 +45,8 @@ enum VL_CSC_COLOR_STANDARD VL_CSC_COLOR_STANDARD_IDENTITY, VL_CSC_COLOR_STANDARD_BT_601, VL_CSC_COLOR_STANDARD_BT_709, - VL_CSC_COLOR_STANDARD_SMPTE_240M + VL_CSC_COLOR_STANDARD_SMPTE_240M, + VL_CSC_COLOR_STANDARD_BT_709_REV }; extern const struct vl_procamp vl_default_procamp; diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c new file mode 100644 index 000000000..0f3cd7257 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2017 Etnaviv Project + * Copyright (C) 2017 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "etnaviv_context.h" +#include "etnaviv_debug.h" +#include "etnaviv_emit.h" +#include "etnaviv_query_hw.h" +#include "etnaviv_screen.h" + +/* + * Occlusion Query: + * + * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they + * interpret results + */ + +static void +occlusion_start(struct etna_hw_query *hq, struct etna_context *ctx) +{ + struct etna_resource *rsc = etna_resource(hq->prsc); + struct etna_reloc r = { + .bo = rsc->bo, + .flags = ETNA_RELOC_WRITE + }; + + if (hq->samples > 63) { + hq->samples = 63; + BUG("samples overflow"); + } + + r.offset = hq->samples * 8; /* 64bit value */ + + etna_set_state_reloc(ctx->stream, VIVS_GL_OCCLUSION_QUERY_ADDR, &r); +} + +static void +occlusion_stop(struct etna_hw_query *hq, struct etna_context *ctx) +{ + /* 0x1DF5E76 is the value used by blob - but any random value will work */ + etna_set_state(ctx->stream, VIVS_GL_OCCLUSION_QUERY_CONTROL, 0x1DF5E76); +} + +static void +occlusion_suspend(struct etna_hw_query *hq, struct etna_context *ctx) +{ + occlusion_stop(hq, ctx); +} + +static void +occlusion_resume(struct etna_hw_query *hq, struct etna_context *ctx) +{ + hq->samples++; + occlusion_start(hq, ctx); +} + +static void +occlusion_result(struct etna_hw_query *hq, void *buf, + union pipe_query_result *result) +{ + uint64_t sum = 0; + uint64_t *ptr = (uint64_t *)buf; + + for (unsigned i = 0; i <= hq->samples; i++) + sum += *(ptr + i); + + if (hq->base.type == PIPE_QUERY_OCCLUSION_COUNTER) + result->u64 = sum; + else + result->b = !!sum; +} + +static void +etna_hw_destroy_query(struct etna_context *ctx, struct etna_query *q) +{ + struct etna_hw_query *hq = etna_hw_query(q); + + pipe_resource_reference(&hq->prsc, NULL); + list_del(&hq->node); + + FREE(hq); +} + +static const struct etna_hw_sample_provider occlusion_provider = { + .start = occlusion_start, + .stop = occlusion_stop, + .suspend = occlusion_suspend, + .resume = occlusion_resume, + .result = occlusion_result, +}; + +static void +realloc_query_bo(struct etna_context *ctx, struct etna_hw_query *hq) +{ + struct etna_resource *rsc; + void *map; + + pipe_resource_reference(&hq->prsc, NULL); + + /* allocate resource with space for 64 * 64bit values */ + hq->prsc = pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER, + 0, 0x1000); + + /* don't assume the buffer is zero-initialized */ + rsc = etna_resource(hq->prsc); + + etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_WRITE); + + map = etna_bo_map(rsc->bo); + memset(map, 0, 0x1000); + etna_bo_cpu_fini(rsc->bo); +} + +static boolean +etna_hw_begin_query(struct etna_context *ctx, struct etna_query *q) +{ + struct etna_hw_query *hq = etna_hw_query(q); + const struct etna_hw_sample_provider *p = hq->provider; + + /* ->begin_query() discards previous results, so realloc bo */ + realloc_query_bo(ctx, hq); + + p->start(hq, ctx); + + /* add to active list */ + assert(list_empty(&hq->node)); + list_addtail(&hq->node, &ctx->active_hw_queries); + + return true; +} + +static void +etna_hw_end_query(struct etna_context *ctx, struct etna_query *q) +{ + struct etna_hw_query *hq = etna_hw_query(q); + const struct etna_hw_sample_provider *p = hq->provider; + + p->stop(hq, ctx); + + /* remove from active list */ + list_delinit(&hq->node); +} + +static boolean +etna_hw_get_query_result(struct etna_context *ctx, struct etna_query *q, + boolean wait, union pipe_query_result *result) +{ + struct etna_hw_query *hq = etna_hw_query(q); + struct etna_resource *rsc = etna_resource(hq->prsc); + const struct etna_hw_sample_provider *p = hq->provider; + + assert(LIST_IS_EMPTY(&hq->node)); + + if (!wait) { + int ret; + + if (rsc->status & ETNA_PENDING_WRITE) { + /* piglit spec@arb_occlusion_query@occlusion_query_conform + * test, and silly apps perhaps, get stuck in a loop trying + * to get query result forever with wait==false.. we don't + * wait to flush unnecessarily but we also don't want to + * spin forever. + */ + if (hq->no_wait_cnt++ > 5) + ctx->base.flush(&ctx->base, NULL, 0); + return false; + } + + ret = etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ | DRM_ETNA_PREP_NOSYNC); + if (ret) + return false; + + etna_bo_cpu_fini(rsc->bo); + } + + /* flush that GPU executes all query related actions */ + ctx->base.flush(&ctx->base, NULL, 0); + + /* get the result */ + etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ); + + void *ptr = etna_bo_map(rsc->bo); + p->result(hq, ptr, result); + + etna_bo_cpu_fini(rsc->bo); + + return true; +} + +static const struct etna_query_funcs hw_query_funcs = { + .destroy_query = etna_hw_destroy_query, + .begin_query = etna_hw_begin_query, + .end_query = etna_hw_end_query, + .get_query_result = etna_hw_get_query_result, +}; + +static inline const struct etna_hw_sample_provider * +query_sample_provider(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + /* fallthrough */ + case PIPE_QUERY_OCCLUSION_PREDICATE: + /* fallthrough */ + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + return &occlusion_provider; + default: + return NULL; + } +} + +struct etna_query * +etna_hw_create_query(struct etna_context *ctx, unsigned query_type) +{ + struct etna_hw_query *hq; + struct etna_query *q; + const struct etna_hw_sample_provider *p; + + p = query_sample_provider(query_type); + if (!p) + return NULL; + + hq = CALLOC_STRUCT(etna_hw_query); + if (!hq) + return NULL; + + hq->provider = p; + + list_inithead(&hq->node); + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = query_type; + + return q; +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h new file mode 100644 index 000000000..73f3c851e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2017 Etnaviv Project + * Copyright (C) 2017 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#ifndef H_ETNAVIV_QUERY_HW +#define H_ETNAVIV_QUERY_HW + +#include "etnaviv_query.h" + +struct etna_hw_query; + +struct etna_hw_sample_provider { + void (*start)(struct etna_hw_query *hq, struct etna_context *ctx); + void (*stop)(struct etna_hw_query *hq, struct etna_context *ctx); + void (*suspend)(struct etna_hw_query *hq, struct etna_context *ctx); + void (*resume)(struct etna_hw_query *hq, struct etna_context *ctx); + + void (*result)(struct etna_hw_query *hq, void *buf, + union pipe_query_result *result); +}; + +struct etna_hw_query { + struct etna_query base; + + struct pipe_resource *prsc; + unsigned samples; /* number of samples stored in resource */ + unsigned no_wait_cnt; /* see etna_hw_get_query_result() */ + struct list_head node; /* list-node in ctx->active_hw_queries */ + + const struct etna_hw_sample_provider *provider; +}; + +static inline struct etna_hw_query * +etna_hw_query(struct etna_query *q) +{ + return (struct etna_hw_query *)q; +} + +struct etna_query * +etna_hw_create_query(struct etna_context *ctx, unsigned query_type); + +static inline void +etna_hw_query_suspend(struct etna_hw_query *hq, struct etna_context *ctx) +{ + const struct etna_hw_sample_provider *p = hq->provider; + + if (!hq->base.active) + return; + + p->suspend(hq, ctx); +} + +static inline void +etna_hw_query_resume(struct etna_hw_query *hq, struct etna_context *ctx) +{ + const struct etna_hw_sample_provider *p = hq->provider; + + if (!hq->base.active) + return; + + p->resume(hq, ctx); +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h index 3c8d8f7c0..1d3d2fb76 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h @@ -34,7 +34,9 @@ struct fd2_blend_stateobj { struct pipe_blend_state base; - uint32_t rb_blendcontrol; + uint32_t rb_blendcontrol_rgb; + uint32_t rb_blendcontrol_alpha; + uint32_t rb_blendcontrol_no_alpha_rgb; uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */ uint32_t rb_colormask; }; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c index 6dc639670..0905ab6f6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c @@ -46,6 +46,12 @@ static uint32_t fmt2swap(enum pipe_format format) { switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B5G5R5X1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: /* TODO probably some more.. */ return 1; default: diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c index 12ab5b410..c2a60c683 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c @@ -52,8 +52,15 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen, /* TODO figure out how to render to other formats.. */ if ((usage & PIPE_BIND_RENDER_TARGET) && - ((format != PIPE_FORMAT_B8G8R8A8_UNORM) && - (format != PIPE_FORMAT_B8G8R8X8_UNORM))) { + ((format != PIPE_FORMAT_B5G6R5_UNORM) && + (format != PIPE_FORMAT_B5G5R5A1_UNORM) && + (format != PIPE_FORMAT_B5G5R5X1_UNORM) && + (format != PIPE_FORMAT_B4G4R4A4_UNORM) && + (format != PIPE_FORMAT_B4G4R4X4_UNORM) && + (format != PIPE_FORMAT_B8G8R8A8_UNORM) && + (format != PIPE_FORMAT_B8G8R8X8_UNORM) && + (format != PIPE_FORMAT_R8G8B8A8_UNORM) && + (format != PIPE_FORMAT_R8G8B8X8_UNORM))) { DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x", util_format_name(format), target, sample_count, usage); return FALSE; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h index 5294ced3c..c2808162c 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h @@ -78,10 +78,8 @@ /** * Max number of instructions (for all fragment shaders combined per context) * that will be kept around (counted in terms of llvm ir). - * Note: the definition looks odd, but there's branches which use a different - * number of max shader variants. */ -#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS) +#define LP_MAX_SHADER_INSTRUCTIONS (2048 * LP_MAX_SHADER_VARIANTS) /** * Max number of setup variants that will be kept around. diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c index d5ed6561b..7b81903b4 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c @@ -125,6 +125,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, } break; case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: for (i = 0; i < num_threads; i++) { /* safer (still not guaranteed) when there's an overflow */ vresult->b = vresult->b || pq->end[i]; @@ -155,6 +156,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, *result = pq->num_primitives_written; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: vresult->b = pq->num_primitives_generated > pq->num_primitives_written; break; case PIPE_QUERY_SO_STATISTICS: { @@ -215,6 +217,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written; pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; break; @@ -229,6 +232,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: llvmpipe->active_occlusion_queries++; llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY; break; @@ -264,6 +268,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written; pq->num_primitives_generated = @@ -291,6 +296,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: assert(llvmpipe->active_occlusion_queries); llvmpipe->active_occlusion_queries--; llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c index 13bed088b..98d6bbe50 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -95,12 +95,12 @@ write_tsv_row(FILE *fp, fprintf(fp, "%s\t%s\t%s\t%s\t%s\t%s\n", - util_dump_blend_func(blend->rt[0].rgb_func, TRUE), - util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), - util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), - util_dump_blend_func(blend->rt[0].alpha_func, TRUE), - util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), - util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); + util_str_blend_func(blend->rt[0].rgb_func, TRUE), + util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + util_str_blend_func(blend->rt[0].alpha_func, TRUE), + util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); fflush(fp); } @@ -119,12 +119,12 @@ dump_blend_type(FILE *fp, fprintf(fp, " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", - "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE), - "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), - "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), - "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE), - "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), - "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); + "rgb_func", util_str_blend_func(blend->rt[0].rgb_func, TRUE), + "rgb_src_factor", util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + "rgb_dst_factor", util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + "alpha_func", util_str_blend_func(blend->rt[0].alpha_func, TRUE), + "alpha_src_factor", util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + "alpha_dst_factor", util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); fprintf(fp, " ...\n"); fflush(fp); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c index 9b1616213..e9a6e01fd 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -357,8 +357,6 @@ test_all(unsigned verbose, FILE *fp) enum pipe_format format; boolean success = TRUE; - util_format_s3tc_init(); - #if USE_TEXTURE_CACHE cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16); #endif @@ -383,11 +381,6 @@ test_all(unsigned verbose, FILE *fp) if (util_format_is_pure_integer(format)) continue; - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && - !util_format_s3tc_enabled) { - continue; - } - /* only have util fetch func for etc1 */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && format != PIPE_FORMAT_ETC1_RGB8) { diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c index 733253b0b..162c74ad7 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c @@ -303,6 +303,8 @@ llvmpipe_resource_create_front(struct pipe_screen *_screen, FREE(lpr); return NULL; } + + static struct pipe_resource * llvmpipe_resource_create(struct pipe_screen *_screen, const struct pipe_resource *templat) @@ -310,6 +312,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen, return llvmpipe_resource_create_front(_screen, templat, NULL); } + static void llvmpipe_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt) diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h index 5556e0c77..89d5b935e 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h @@ -10,7 +10,6 @@ * POW - EX2 + MUL + LG2 * SUB - ADD, second source negated * SWZ - MOV - * XPD - * * Register access * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h index e66d8af76..907ca17b0 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h +++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h @@ -163,8 +163,6 @@ * SUB - ADD, negate second source * RSQ - LG2 + EX2 * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD * * NV40 Looping * Loops appear to be fairly expensive on NV40 at least, the proprietary diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index 727b50937..ac3e409b2 100644 --- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -157,6 +157,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: hq->nesting = nv50->screen->num_occlusion_queries_active++; if (hq->nesting) { nv50_hw_query_get(push, q, 0x10, 0x0100f002); @@ -215,6 +216,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: nv50_hw_query_get(push, q, 0, 0x0100f002); if (--nv50->screen->num_occlusion_queries_active == 0) { PUSH_SPACE(push, 2); @@ -307,6 +309,7 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, res64[0] = hq->data[1] - hq->data[5]; break; case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: res8[0] = hq->data[1] != hq->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ @@ -378,6 +381,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: hq->rotate = 32; break; case PIPE_QUERY_PRIMITIVES_GENERATED: diff --git a/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c index 075eac50f..3ec5a42c7 100644 --- a/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c +++ b/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c @@ -35,6 +35,8 @@ #include <stdio.h> +#include "util/macros.h" + #include "r300_reg.h" #include "radeon_compiler.h" @@ -61,7 +63,7 @@ static const struct swizzle_data native_swizzles[] = { {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0} }; -static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); +static const int num_native_swizzles = ARRAY_SIZE(native_swizzles); /** * Find a native RGB swizzle that matches the given swizzle. diff --git a/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c b/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c index 0f021e9f4..d86819afa 100644 --- a/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c +++ b/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c @@ -26,7 +26,6 @@ #include "r300_fs.h" #include "util/u_format.h" -#include "util/u_mm.h" /* HiZ rules - taken from various docs diff --git a/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c b/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c new file mode 100644 index 000000000..6bc307a4b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c @@ -0,0 +1,270 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +#include "r600_cs.h" +#include "evergreend.h" + +/* 2xMSAA + * There are two locations (4, 4), (-4, -4). */ +const uint32_t eg_sample_locs_2x[4] = { + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), +}; +const unsigned eg_max_dist_2x = 4; +/* 4xMSAA + * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */ +const uint32_t eg_sample_locs_4x[4] = { + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), +}; +const unsigned eg_max_dist_4x = 6; + +/* Cayman 8xMSAA */ +static const uint32_t cm_sample_locs_8x[] = { + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), +}; +static const unsigned cm_max_dist_8x = 8; +/* Cayman 16xMSAA */ +static const uint32_t cm_sample_locs_16x[] = { + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), +}; +static const unsigned cm_max_dist_16x = 8; + +void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, + unsigned sample_index, float *out_value) +{ + int offset, index; + struct { + int idx:4; + } val; + switch (sample_count) { + case 1: + default: + out_value[0] = out_value[1] = 0.5; + break; + case 2: + offset = 4 * (sample_index * 2); + val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 4: + offset = 4 * (sample_index * 2); + val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 8: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 16: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + } +} + +void cayman_init_msaa(struct pipe_context *ctx) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + int i; + + cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]); + + for (i = 0; i < 2; i++) + cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]); + for (i = 0; i < 4; i++) + cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]); + for (i = 0; i < 8; i++) + cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]); + for (i = 0; i < 16; i++) + cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]); +} + +void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) +{ + switch (nr_samples) { + default: + case 1: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); + break; + case 2: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]); + break; + case 4: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]); + break; + case 8: + radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); + radeon_emit(cs, cm_sample_locs_8x[0]); + radeon_emit(cs, cm_sample_locs_8x[4]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, cm_sample_locs_8x[1]); + radeon_emit(cs, cm_sample_locs_8x[5]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, cm_sample_locs_8x[2]); + radeon_emit(cs, cm_sample_locs_8x[6]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, cm_sample_locs_8x[3]); + radeon_emit(cs, cm_sample_locs_8x[7]); + break; + case 16: + radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16); + radeon_emit(cs, cm_sample_locs_16x[0]); + radeon_emit(cs, cm_sample_locs_16x[4]); + radeon_emit(cs, cm_sample_locs_16x[8]); + radeon_emit(cs, cm_sample_locs_16x[12]); + radeon_emit(cs, cm_sample_locs_16x[1]); + radeon_emit(cs, cm_sample_locs_16x[5]); + radeon_emit(cs, cm_sample_locs_16x[9]); + radeon_emit(cs, cm_sample_locs_16x[13]); + radeon_emit(cs, cm_sample_locs_16x[2]); + radeon_emit(cs, cm_sample_locs_16x[6]); + radeon_emit(cs, cm_sample_locs_16x[10]); + radeon_emit(cs, cm_sample_locs_16x[14]); + radeon_emit(cs, cm_sample_locs_16x[3]); + radeon_emit(cs, cm_sample_locs_16x[7]); + radeon_emit(cs, cm_sample_locs_16x[11]); + radeon_emit(cs, cm_sample_locs_16x[15]); + break; + } +} + +void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, + int ps_iter_samples, int overrast_samples, + unsigned sc_mode_cntl_1) +{ + int setup_samples = nr_samples > 1 ? nr_samples : + overrast_samples > 1 ? overrast_samples : 0; + /* Required by OpenGL line rasterization. + * + * TODO: We should also enable perpendicular endcaps for AA lines, + * but that requires implementing line stippling in the pixel + * shader. SC can only do line stippling with axis-aligned + * endcaps. + */ + unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); + + if (setup_samples > 1) { + /* indexed by log2(nr_samples) */ + unsigned max_dist[] = { + 0, + eg_max_dist_2x, + eg_max_dist_4x, + cm_max_dist_8x, + cm_max_dist_16x + }; + unsigned log_samples = util_logbase2(setup_samples); + unsigned log_ps_iter_samples = + util_logbase2(util_next_power_of_two(ps_iter_samples)); + + radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); + radeon_emit(cs, sc_line_cntl | + S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ + radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | + S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | + S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ + + if (nr_samples > 1) { + radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, + S_028804_MAX_ANCHOR_SAMPLES(log_samples) | + S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | + S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | + S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); + radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, + EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | + sc_mode_cntl_1); + } else if (overrast_samples > 1) { + radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | + S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); + radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, + sc_mode_cntl_1); + } + } else { + radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2); + radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */ + radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */ + + radeon_set_context_reg(cs, CM_R_028804_DB_EQAA, + S_028804_HIGH_QUALITY_INTERSECTIONS(1) | + S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); + radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, + sc_mode_cntl_1); + } +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c new file mode 100644 index 000000000..a6e3b7fcf --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c @@ -0,0 +1,685 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Marek Olšák + */ + +#include "r600_cs.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include <inttypes.h> +#include <stdio.h> + +bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, + struct pb_buffer *buf, + enum radeon_bo_usage usage) +{ + if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) { + return true; + } + if (radeon_emitted(ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) { + return true; + } + return false; +} + +void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, + struct r600_resource *resource, + unsigned usage) +{ + enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; + bool busy = false; + + assert(!(resource->flags & RADEON_FLAG_SPARSE)); + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + return ctx->ws->buffer_map(resource->buf, NULL, usage); + } + + if (!(usage & PIPE_TRANSFER_WRITE)) { + /* have to wait for the last write */ + rusage = RADEON_USAGE_WRITE; + } + + if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && + ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, + resource->buf, rusage)) { + if (usage & PIPE_TRANSFER_DONTBLOCK) { + ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + return NULL; + } else { + ctx->gfx.flush(ctx, 0, NULL); + busy = true; + } + } + if (radeon_emitted(ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, + resource->buf, rusage)) { + if (usage & PIPE_TRANSFER_DONTBLOCK) { + ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + return NULL; + } else { + ctx->dma.flush(ctx, 0, NULL); + busy = true; + } + } + + if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) { + if (usage & PIPE_TRANSFER_DONTBLOCK) { + return NULL; + } else { + /* We will be wait for the GPU. Wait for any offloaded + * CS flush to complete to avoid busy-waiting in the winsys. */ + ctx->ws->cs_sync_flush(ctx->gfx.cs); + if (ctx->dma.cs) + ctx->ws->cs_sync_flush(ctx->dma.cs); + } + } + + /* Setting the CS to NULL will prevent doing checks we have done already. */ + return ctx->ws->buffer_map(resource->buf, NULL, usage); +} + +void r600_init_resource_fields(struct r600_common_screen *rscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment) +{ + struct r600_texture *rtex = (struct r600_texture*)res; + + res->bo_size = size; + res->bo_alignment = alignment; + res->flags = 0; + res->texture_handle_allocated = false; + res->image_handle_allocated = false; + + switch (res->b.b.usage) { + case PIPE_USAGE_STREAM: + res->flags = RADEON_FLAG_GTT_WC; + /* fall through */ + case PIPE_USAGE_STAGING: + /* Transfers are likely to occur more often with these + * resources. */ + res->domains = RADEON_DOMAIN_GTT; + break; + case PIPE_USAGE_DYNAMIC: + /* Older kernels didn't always flush the HDP cache before + * CS execution + */ + if (rscreen->info.drm_major == 2 && + rscreen->info.drm_minor < 40) { + res->domains = RADEON_DOMAIN_GTT; + res->flags |= RADEON_FLAG_GTT_WC; + break; + } + /* fall through */ + case PIPE_USAGE_DEFAULT: + case PIPE_USAGE_IMMUTABLE: + default: + /* Not listing GTT here improves performance in some + * apps. */ + res->domains = RADEON_DOMAIN_VRAM; + res->flags |= RADEON_FLAG_GTT_WC; + break; + } + + if (res->b.b.target == PIPE_BUFFER && + res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | + PIPE_RESOURCE_FLAG_MAP_COHERENT)) { + /* Use GTT for all persistent mappings with older + * kernels, because they didn't always flush the HDP + * cache before CS execution. + * + * Write-combined CPU mappings are fine, the kernel + * ensures all CPU writes finish before the GPU + * executes a command stream. + */ + if (rscreen->info.drm_major == 2 && + rscreen->info.drm_minor < 40) + res->domains = RADEON_DOMAIN_GTT; + } + + /* Tiled textures are unmappable. Always put them in VRAM. */ + if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) || + res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) { + res->domains = RADEON_DOMAIN_VRAM; + res->flags |= RADEON_FLAG_NO_CPU_ACCESS | + RADEON_FLAG_GTT_WC; + } + + /* Only displayable single-sample textures can be shared between + * processes. */ + if (res->b.b.target == PIPE_BUFFER || + res->b.b.nr_samples >= 2 || + (rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY && + /* Raven doesn't use display micro mode for 32bpp, so check this: */ + !(res->b.b.bind & PIPE_BIND_SCANOUT))) + res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; + + /* If VRAM is just stolen system memory, allow both VRAM and + * GTT, whichever has free space. If a buffer is evicted from + * VRAM to GTT, it will stay there. + * + * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only + * placements even with a low amount of stolen VRAM. + */ + if (!rscreen->info.has_dedicated_vram && + (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) && + res->domains == RADEON_DOMAIN_VRAM) { + res->domains = RADEON_DOMAIN_VRAM_GTT; + res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */ + } + + if (rscreen->debug_flags & DBG_NO_WC) + res->flags &= ~RADEON_FLAG_GTT_WC; + + if (res->b.b.bind & PIPE_BIND_SHARED) + res->flags |= RADEON_FLAG_NO_SUBALLOC; + + /* Set expected VRAM and GART usage for the buffer. */ + res->vram_usage = 0; + res->gart_usage = 0; + + if (res->domains & RADEON_DOMAIN_VRAM) + res->vram_usage = size; + else if (res->domains & RADEON_DOMAIN_GTT) + res->gart_usage = size; +} + +bool r600_alloc_resource(struct r600_common_screen *rscreen, + struct r600_resource *res) +{ + struct pb_buffer *old_buf, *new_buf; + + /* Allocate a new resource. */ + new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size, + res->bo_alignment, + res->domains, res->flags); + if (!new_buf) { + return false; + } + + /* Replace the pointer such that if res->buf wasn't NULL, it won't be + * NULL. This should prevent crashes with multiple contexts using + * the same buffer where one of the contexts invalidates it while + * the others are using it. */ + old_buf = res->buf; + res->buf = new_buf; /* should be atomic */ + + if (rscreen->info.has_virtual_memory) + res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); + else + res->gpu_address = 0; + + pb_reference(&old_buf, NULL); + + util_range_set_empty(&res->valid_buffer_range); + + /* Print debug information. */ + if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { + fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", + res->gpu_address, res->gpu_address + res->buf->size, + res->buf->size); + } + return true; +} + +static void r600_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct r600_resource *rbuffer = r600_resource(buf); + + threaded_resource_deinit(buf); + util_range_destroy(&rbuffer->valid_buffer_range); + pb_reference(&rbuffer->buf, NULL); + FREE(rbuffer); +} + +static bool +r600_invalidate_buffer(struct r600_common_context *rctx, + struct r600_resource *rbuffer) +{ + /* Shared buffers can't be reallocated. */ + if (rbuffer->b.is_shared) + return false; + + /* Sparse buffers can't be reallocated. */ + if (rbuffer->flags & RADEON_FLAG_SPARSE) + return false; + + /* In AMD_pinned_memory, the user pointer association only gets + * broken when the buffer is explicitly re-allocated. + */ + if (rbuffer->b.is_user_ptr) + return false; + + /* Check if mapping this buffer would cause waiting for the GPU. */ + if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { + rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); + } else { + util_range_set_empty(&rbuffer->valid_buffer_range); + } + + return true; +} + +/* Replace the storage of dst with src. */ +void r600_replace_buffer_storage(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_resource *rdst = r600_resource(dst); + struct r600_resource *rsrc = r600_resource(src); + uint64_t old_gpu_address = rdst->gpu_address; + + pb_reference(&rdst->buf, rsrc->buf); + rdst->gpu_address = rsrc->gpu_address; + rdst->b.b.bind = rsrc->b.b.bind; + rdst->flags = rsrc->flags; + + assert(rdst->vram_usage == rsrc->vram_usage); + assert(rdst->gart_usage == rsrc->gart_usage); + assert(rdst->bo_size == rsrc->bo_size); + assert(rdst->bo_alignment == rsrc->bo_alignment); + assert(rdst->domains == rsrc->domains); + + rctx->rebind_buffer(ctx, dst, old_gpu_address); +} + +void r600_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_resource *rbuffer = r600_resource(resource); + + /* We currently only do anyting here for buffers */ + if (resource->target == PIPE_BUFFER) + (void)r600_invalidate_buffer(rctx, rbuffer); +} + +static void *r600_buffer_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer, + void *data, struct r600_resource *staging, + unsigned offset) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_transfer *transfer; + + if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC) + transfer = slab_alloc(&rctx->pool_transfers_unsync); + else + transfer = slab_alloc(&rctx->pool_transfers); + + transfer->b.b.resource = NULL; + pipe_resource_reference(&transfer->b.b.resource, resource); + transfer->b.b.level = 0; + transfer->b.b.usage = usage; + transfer->b.b.box = *box; + transfer->b.b.stride = 0; + transfer->b.b.layer_stride = 0; + transfer->b.staging = NULL; + transfer->offset = offset; + transfer->staging = staging; + *ptransfer = &transfer->b.b; + return data; +} + +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, + unsigned dstx, unsigned srcx, unsigned size) +{ + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); + + return rctx->screen->has_cp_dma || + (dword_aligned && (rctx->dma.cs || + rctx->screen->has_streamout)); + +} + +static void *r600_buffer_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; + struct r600_resource *rbuffer = r600_resource(resource); + uint8_t *data; + + assert(box->x + box->width <= resource->width0); + + /* From GL_AMD_pinned_memory issues: + * + * 4) Is glMapBuffer on a shared buffer guaranteed to return the + * same system address which was specified at creation time? + * + * RESOLVED: NO. The GL implementation might return a different + * virtual mapping of that memory, although the same physical + * page will be used. + * + * So don't ever use staging buffers. + */ + if (rbuffer->b.is_user_ptr) + usage |= PIPE_TRANSFER_PERSISTENT; + + /* See if the buffer range being mapped has never been initialized, + * in which case it can be mapped unsynchronized. */ + if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | + TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && + usage & PIPE_TRANSFER_WRITE && + !rbuffer->b.is_shared && + !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + } + + /* If discarding the entire range, discard the whole resource instead. */ + if (usage & PIPE_TRANSFER_DISCARD_RANGE && + box->x == 0 && box->width == resource->width0) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && + !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | + TC_TRANSFER_MAP_NO_INVALIDATE))) { + assert(usage & PIPE_TRANSFER_WRITE); + + if (r600_invalidate_buffer(rctx, rbuffer)) { + /* At this point, the buffer is always idle. */ + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + } else { + /* Fall back to a temporary buffer. */ + usage |= PIPE_TRANSFER_DISCARD_RANGE; + } + } + + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && + ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_PERSISTENT)) && + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) || + (rbuffer->flags & RADEON_FLAG_SPARSE))) { + assert(usage & PIPE_TRANSFER_WRITE); + + /* Check if mapping this buffer would cause waiting for the GPU. + */ + if (rbuffer->flags & RADEON_FLAG_SPARSE || + r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { + /* Do a wait-free write-only transfer using a temporary buffer. */ + unsigned offset; + struct r600_resource *staging = NULL; + + u_upload_alloc(ctx->stream_uploader, 0, + box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), + rctx->screen->info.tcc_cache_line_size, + &offset, (struct pipe_resource**)&staging, + (void**)&data); + + if (staging) { + data += box->x % R600_MAP_BUFFER_ALIGNMENT; + return r600_buffer_get_transfer(ctx, resource, usage, box, + ptransfer, data, staging, offset); + } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { + return NULL; + } + } else { + /* At this point, the buffer is always idle (we checked it above). */ + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + } + } + /* Use a staging buffer in cached GTT for reads. */ + else if (((usage & PIPE_TRANSFER_READ) && + !(usage & PIPE_TRANSFER_PERSISTENT) && + (rbuffer->domains & RADEON_DOMAIN_VRAM || + rbuffer->flags & RADEON_FLAG_GTT_WC) && + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) || + (rbuffer->flags & RADEON_FLAG_SPARSE)) { + struct r600_resource *staging; + + assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)); + staging = (struct r600_resource*) pipe_buffer_create( + ctx->screen, 0, PIPE_USAGE_STAGING, + box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); + if (staging) { + /* Copy the VRAM buffer to the staging buffer. */ + rctx->dma_copy(ctx, &staging->b.b, 0, + box->x % R600_MAP_BUFFER_ALIGNMENT, + 0, 0, resource, 0, box); + + data = r600_buffer_map_sync_with_rings(rctx, staging, + usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); + if (!data) { + r600_resource_reference(&staging, NULL); + return NULL; + } + data += box->x % R600_MAP_BUFFER_ALIGNMENT; + + return r600_buffer_get_transfer(ctx, resource, usage, box, + ptransfer, data, staging, 0); + } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { + return NULL; + } + } + + data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); + if (!data) { + return NULL; + } + data += box->x; + + return r600_buffer_get_transfer(ctx, resource, usage, box, + ptransfer, data, NULL, 0); +} + +static void r600_buffer_do_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource *rbuffer = r600_resource(transfer->resource); + + if (rtransfer->staging) { + struct pipe_resource *dst, *src; + unsigned soffset; + struct pipe_box dma_box; + + dst = transfer->resource; + src = &rtransfer->staging->b.b; + soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT; + + u_box_1d(soffset, box->width, &dma_box); + + /* Copy the staging buffer into the original one. */ + ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box); + } + + util_range_add(&rbuffer->valid_buffer_range, box->x, + box->x + box->width); +} + +static void r600_buffer_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *rel_box) +{ + unsigned required_usage = PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_FLUSH_EXPLICIT; + + if ((transfer->usage & required_usage) == required_usage) { + struct pipe_box box; + + u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box); + r600_buffer_do_flush_region(ctx, transfer, &box); + } +} + +static void r600_buffer_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + + if (transfer->usage & PIPE_TRANSFER_WRITE && + !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + r600_buffer_do_flush_region(ctx, transfer, &transfer->box); + + r600_resource_reference(&rtransfer->staging, NULL); + assert(rtransfer->b.staging == NULL); /* for threaded context only */ + pipe_resource_reference(&transfer->resource, NULL); + + /* Don't use pool_transfers_unsync. We are always in the driver + * thread. */ + slab_free(&rctx->pool_transfers, transfer); +} + +void r600_buffer_subdata(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned usage, unsigned offset, + unsigned size, const void *data) +{ + struct pipe_transfer *transfer = NULL; + struct pipe_box box; + uint8_t *map = NULL; + + u_box_1d(offset, size, &box); + map = r600_buffer_transfer_map(ctx, buffer, 0, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE | + usage, + &box, &transfer); + if (!map) + return; + + memcpy(map, data, size); + r600_buffer_transfer_unmap(ctx, transfer); +} + +static const struct u_resource_vtbl r600_buffer_vtbl = +{ + NULL, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + r600_buffer_flush_region, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ +}; + +static struct r600_resource * +r600_alloc_buffer_struct(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_resource *rbuffer; + + rbuffer = MALLOC_STRUCT(r600_resource); + + rbuffer->b.b = *templ; + rbuffer->b.b.next = NULL; + pipe_reference_init(&rbuffer->b.b.reference, 1); + rbuffer->b.b.screen = screen; + + rbuffer->b.vtbl = &r600_buffer_vtbl; + threaded_resource_init(&rbuffer->b.b); + + rbuffer->buf = NULL; + rbuffer->bind_history = 0; + util_range_init(&rbuffer->valid_buffer_range); + return rbuffer; +} + +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); + + r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment); + + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) + rbuffer->flags |= RADEON_FLAG_SPARSE; + + if (!r600_alloc_resource(rscreen, rbuffer)) { + FREE(rbuffer); + return NULL; + } + return &rbuffer->b.b; +} + +struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned flags, + unsigned usage, + unsigned size, + unsigned alignment) +{ + struct pipe_resource buffer; + + memset(&buffer, 0, sizeof buffer); + buffer.target = PIPE_BUFFER; + buffer.format = PIPE_FORMAT_R8_UNORM; + buffer.bind = 0; + buffer.usage = usage; + buffer.flags = flags; + buffer.width0 = size; + buffer.height0 = 1; + buffer.depth0 = 1; + buffer.array_size = 1; + return r600_buffer_create(screen, &buffer, alignment); +} + +struct pipe_resource * +r600_buffer_from_user_memory(struct pipe_screen *screen, + const struct pipe_resource *templ, + void *user_memory) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_winsys *ws = rscreen->ws; + struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); + + rbuffer->domains = RADEON_DOMAIN_GTT; + rbuffer->flags = 0; + rbuffer->b.is_user_ptr = true; + util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); + util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0); + + /* Convert a user pointer to a buffer. */ + rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); + if (!rbuffer->buf) { + FREE(rbuffer); + return NULL; + } + + if (rscreen->info.has_virtual_memory) + rbuffer->gpu_address = + ws->buffer_get_virtual_address(rbuffer->buf); + else + rbuffer->gpu_address = 0; + + rbuffer->vram_usage = 0; + rbuffer->gart_usage = templ->width0; + + return &rbuffer->b.b; +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_cs.h b/lib/mesa/src/gallium/drivers/r600/r600_cs.h new file mode 100644 index 000000000..0efae09f3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_cs.h @@ -0,0 +1,209 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + */ + +/** + * This file contains helpers for writing commands to commands streams. + */ + +#ifndef R600_CS_H +#define R600_CS_H + +#include "r600_pipe_common.h" +#include "r600d_common.h" + +/** + * Return true if there is enough memory in VRAM and GTT for the buffers + * added so far. + * + * \param vram VRAM memory size not added to the buffer list yet + * \param gtt GTT memory size not added to the buffer list yet + */ +static inline bool +radeon_cs_memory_below_limit(struct r600_common_screen *screen, + struct radeon_winsys_cs *cs, + uint64_t vram, uint64_t gtt) +{ + vram += cs->used_vram; + gtt += cs->used_gart; + + /* Anything that goes above the VRAM size should go to GTT. */ + if (vram > screen->info.vram_size) + gtt += vram - screen->info.vram_size; + + /* Now we just need to check if we have enough GTT. */ + return gtt < screen->info.gart_size * 0.7; +} + +/** + * Add a buffer to the buffer list for the given command stream (CS). + * + * All buffers used by a CS must be added to the list. This tells the kernel + * driver which buffers are used by GPU commands. Other buffers can + * be swapped out (not accessible) during execution. + * + * The buffer list becomes empty after every context flush and must be + * rebuilt. + */ +static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx, + struct r600_ring *ring, + struct r600_resource *rbo, + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) +{ + assert(usage); + return rctx->ws->cs_add_buffer( + ring->cs, rbo->buf, + (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), + rbo->domains, priority) * 4; +} + +/** + * Same as above, but also checks memory usage and flushes the context + * accordingly. + * + * When this SHOULD NOT be used: + * + * - if r600_context_add_resource_size has been called for the buffer + * followed by *_need_cs_space for checking the memory usage + * + * - if r600_need_dma_space has been called for the buffer + * + * - when emitting state packets and draw packets (because preceding packets + * can't be re-emitted at that point) + * + * - if shader resource "enabled_mask" is not up-to-date or there is + * a different constraint disallowing a context flush + */ +static inline unsigned +radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx, + struct r600_ring *ring, + struct r600_resource *rbo, + enum radeon_bo_usage usage, + enum radeon_bo_priority priority, + bool check_mem) +{ + if (check_mem && + !radeon_cs_memory_below_limit(rctx->screen, ring->cs, + rctx->vram + rbo->vram_usage, + rctx->gtt + rbo->gart_usage)) + ring->flush(rctx, RADEON_FLUSH_ASYNC, NULL); + + return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); +} + +static inline void r600_emit_reloc(struct r600_common_context *rctx, + struct r600_ring *ring, struct r600_resource *rbo, + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) +{ + struct radeon_winsys_cs *cs = ring->cs; + bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory; + unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); + + if (!has_vm) { + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + } +} + +static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +{ + assert(reg < R600_CONTEXT_REG_OFFSET); + assert(cs->current.cdw + 2 + num <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); + radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); +} + +static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +{ + radeon_set_config_reg_seq(cs, reg, 1); + radeon_emit(cs, value); +} + +static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +{ + assert(reg >= R600_CONTEXT_REG_OFFSET); + assert(cs->current.cdw + 2 + num <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); + radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); +} + +static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +{ + radeon_set_context_reg_seq(cs, reg, 1); + radeon_emit(cs, value); +} + +static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs, + unsigned reg, unsigned idx, + unsigned value) +{ + assert(reg >= R600_CONTEXT_REG_OFFSET); + assert(cs->current.cdw + 3 <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); + radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, value); +} + +static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +{ + assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); + assert(cs->current.cdw + 2 + num <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); +} + +static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +{ + radeon_set_sh_reg_seq(cs, reg, 1); + radeon_emit(cs, value); +} + +static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +{ + assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); + assert(cs->current.cdw + 2 + num <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); + radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); +} + +static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +{ + radeon_set_uconfig_reg_seq(cs, reg, 1); + radeon_emit(cs, value); +} + +static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs, + unsigned reg, unsigned idx, + unsigned value) +{ + assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); + assert(cs->current.cdw + 3 <= cs->current.max_dw); + radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0)); + radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, value); +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c b/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c new file mode 100644 index 000000000..c15fb9dfa --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c @@ -0,0 +1,263 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +/* The GPU load is measured as follows. + * + * There is a thread which samples the GRBM_STATUS register at a certain + * frequency and the "busy" or "idle" counter is incremented based on + * whether the GUI_ACTIVE bit is set or not. + * + * Then, the user can sample the counters twice and calculate the average + * GPU load between the two samples. + */ + +#include "r600_pipe_common.h" +#include "r600_query.h" +#include "os/os_time.h" + +/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher + * fps (there are too few samples per frame). */ +#define SAMPLES_PER_SEC 10000 + +#define GRBM_STATUS 0x8010 +#define TA_BUSY(x) (((x) >> 14) & 0x1) +#define GDS_BUSY(x) (((x) >> 15) & 0x1) +#define VGT_BUSY(x) (((x) >> 17) & 0x1) +#define IA_BUSY(x) (((x) >> 19) & 0x1) +#define SX_BUSY(x) (((x) >> 20) & 0x1) +#define WD_BUSY(x) (((x) >> 21) & 0x1) +#define SPI_BUSY(x) (((x) >> 22) & 0x1) +#define BCI_BUSY(x) (((x) >> 23) & 0x1) +#define SC_BUSY(x) (((x) >> 24) & 0x1) +#define PA_BUSY(x) (((x) >> 25) & 0x1) +#define DB_BUSY(x) (((x) >> 26) & 0x1) +#define CP_BUSY(x) (((x) >> 29) & 0x1) +#define CB_BUSY(x) (((x) >> 30) & 0x1) +#define GUI_ACTIVE(x) (((x) >> 31) & 0x1) + +#define SRBM_STATUS2 0x0e4c +#define SDMA_BUSY(x) (((x) >> 5) & 0x1) + +#define CP_STAT 0x8680 +#define PFP_BUSY(x) (((x) >> 15) & 0x1) +#define MEQ_BUSY(x) (((x) >> 16) & 0x1) +#define ME_BUSY(x) (((x) >> 17) & 0x1) +#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1) +#define DMA_BUSY(x) (((x) >> 22) & 0x1) +#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1) + +#define IDENTITY(x) x + +#define UPDATE_COUNTER(field, mask) \ + do { \ + if (mask(value)) \ + p_atomic_inc(&counters->named.field.busy); \ + else \ + p_atomic_inc(&counters->named.field.idle); \ + } while (0) + +static void r600_update_mmio_counters(struct r600_common_screen *rscreen, + union r600_mmio_counters *counters) +{ + uint32_t value = 0; + bool gui_busy, sdma_busy = false; + + /* GRBM_STATUS */ + rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); + + UPDATE_COUNTER(ta, TA_BUSY); + UPDATE_COUNTER(gds, GDS_BUSY); + UPDATE_COUNTER(vgt, VGT_BUSY); + UPDATE_COUNTER(ia, IA_BUSY); + UPDATE_COUNTER(sx, SX_BUSY); + UPDATE_COUNTER(wd, WD_BUSY); + UPDATE_COUNTER(spi, SPI_BUSY); + UPDATE_COUNTER(bci, BCI_BUSY); + UPDATE_COUNTER(sc, SC_BUSY); + UPDATE_COUNTER(pa, PA_BUSY); + UPDATE_COUNTER(db, DB_BUSY); + UPDATE_COUNTER(cp, CP_BUSY); + UPDATE_COUNTER(cb, CB_BUSY); + UPDATE_COUNTER(gui, GUI_ACTIVE); + gui_busy = GUI_ACTIVE(value); + + value = gui_busy || sdma_busy; + UPDATE_COUNTER(gpu, IDENTITY); +} + +#undef UPDATE_COUNTER + +static int +r600_gpu_load_thread(void *param) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)param; + const int period_us = 1000000 / SAMPLES_PER_SEC; + int sleep_us = period_us; + int64_t cur_time, last_time = os_time_get(); + + while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { + if (sleep_us) + os_time_sleep(sleep_us); + + /* Make sure we sleep the ideal amount of time to match + * the expected frequency. */ + cur_time = os_time_get(); + + if (os_time_timeout(last_time, last_time + period_us, + cur_time)) + sleep_us = MAX2(sleep_us - 1, 1); + else + sleep_us += 1; + + /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/ + last_time = cur_time; + + /* Update the counters. */ + r600_update_mmio_counters(rscreen, &rscreen->mmio_counters); + } + p_atomic_dec(&rscreen->gpu_load_stop_thread); + return 0; +} + +void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen) +{ + if (!rscreen->gpu_load_thread) + return; + + p_atomic_inc(&rscreen->gpu_load_stop_thread); + thrd_join(rscreen->gpu_load_thread, NULL); + rscreen->gpu_load_thread = 0; +} + +static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen, + unsigned busy_index) +{ + /* Start the thread if needed. */ + if (!rscreen->gpu_load_thread) { + mtx_lock(&rscreen->gpu_load_mutex); + /* Check again inside the mutex. */ + if (!rscreen->gpu_load_thread) + rscreen->gpu_load_thread = + u_thread_create(r600_gpu_load_thread, rscreen); + mtx_unlock(&rscreen->gpu_load_mutex); + } + + unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]); + unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]); + + return busy | ((uint64_t)idle << 32); +} + +static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen, + uint64_t begin, unsigned busy_index) +{ + uint64_t end = r600_read_mmio_counter(rscreen, busy_index); + unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); + unsigned idle = (end >> 32) - (begin >> 32); + + /* Calculate the % of time the busy counter was being incremented. + * + * If no counters were incremented, return the current counter status. + * It's for the case when the load is queried faster than + * the counters are updated. + */ + if (idle || busy) { + return busy*100 / (busy + idle); + } else { + union r600_mmio_counters counters; + + memset(&counters, 0, sizeof(counters)); + r600_update_mmio_counters(rscreen, &counters); + return counters.array[busy_index] ? 100 : 0; + } +} + +#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \ + rscreen->mmio_counters.array) + +static unsigned busy_index_from_type(struct r600_common_screen *rscreen, + unsigned type) +{ + switch (type) { + case R600_QUERY_GPU_LOAD: + return BUSY_INDEX(rscreen, gpu); + case R600_QUERY_GPU_SHADERS_BUSY: + return BUSY_INDEX(rscreen, spi); + case R600_QUERY_GPU_TA_BUSY: + return BUSY_INDEX(rscreen, ta); + case R600_QUERY_GPU_GDS_BUSY: + return BUSY_INDEX(rscreen, gds); + case R600_QUERY_GPU_VGT_BUSY: + return BUSY_INDEX(rscreen, vgt); + case R600_QUERY_GPU_IA_BUSY: + return BUSY_INDEX(rscreen, ia); + case R600_QUERY_GPU_SX_BUSY: + return BUSY_INDEX(rscreen, sx); + case R600_QUERY_GPU_WD_BUSY: + return BUSY_INDEX(rscreen, wd); + case R600_QUERY_GPU_BCI_BUSY: + return BUSY_INDEX(rscreen, bci); + case R600_QUERY_GPU_SC_BUSY: + return BUSY_INDEX(rscreen, sc); + case R600_QUERY_GPU_PA_BUSY: + return BUSY_INDEX(rscreen, pa); + case R600_QUERY_GPU_DB_BUSY: + return BUSY_INDEX(rscreen, db); + case R600_QUERY_GPU_CP_BUSY: + return BUSY_INDEX(rscreen, cp); + case R600_QUERY_GPU_CB_BUSY: + return BUSY_INDEX(rscreen, cb); + case R600_QUERY_GPU_SDMA_BUSY: + return BUSY_INDEX(rscreen, sdma); + case R600_QUERY_GPU_PFP_BUSY: + return BUSY_INDEX(rscreen, pfp); + case R600_QUERY_GPU_MEQ_BUSY: + return BUSY_INDEX(rscreen, meq); + case R600_QUERY_GPU_ME_BUSY: + return BUSY_INDEX(rscreen, me); + case R600_QUERY_GPU_SURF_SYNC_BUSY: + return BUSY_INDEX(rscreen, surf_sync); + case R600_QUERY_GPU_CP_DMA_BUSY: + return BUSY_INDEX(rscreen, cp_dma); + case R600_QUERY_GPU_SCRATCH_RAM_BUSY: + return BUSY_INDEX(rscreen, scratch_ram); + default: + unreachable("invalid query type"); + } +} + +uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type) +{ + unsigned busy_index = busy_index_from_type(rscreen, type); + return r600_read_mmio_counter(rscreen, busy_index); +} + +unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type, + uint64_t begin) +{ + unsigned busy_index = busy_index_from_type(rscreen, type); + return r600_end_mmio_counter(rscreen, begin, busy_index); +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c b/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c new file mode 100644 index 000000000..f186acb05 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c @@ -0,0 +1,649 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Nicolai Hähnle <nicolai.haehnle@amd.com> + * + */ + +#include "util/u_memory.h" +#include "r600_query.h" +#include "r600_pipe_common.h" +#include "r600d_common.h" + +/* Max counters per HW block */ +#define R600_QUERY_MAX_COUNTERS 16 + +static struct r600_perfcounter_block * +lookup_counter(struct r600_perfcounters *pc, unsigned index, + unsigned *base_gid, unsigned *sub_index) +{ + struct r600_perfcounter_block *block = pc->blocks; + unsigned bid; + + *base_gid = 0; + for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { + unsigned total = block->num_groups * block->num_selectors; + + if (index < total) { + *sub_index = index; + return block; + } + + index -= total; + *base_gid += block->num_groups; + } + + return NULL; +} + +static struct r600_perfcounter_block * +lookup_group(struct r600_perfcounters *pc, unsigned *index) +{ + unsigned bid; + struct r600_perfcounter_block *block = pc->blocks; + + for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { + if (*index < block->num_groups) + return block; + *index -= block->num_groups; + } + + return NULL; +} + +struct r600_pc_group { + struct r600_pc_group *next; + struct r600_perfcounter_block *block; + unsigned sub_gid; /* only used during init */ + unsigned result_base; /* only used during init */ + int se; + int instance; + unsigned num_counters; + unsigned selectors[R600_QUERY_MAX_COUNTERS]; +}; + +struct r600_pc_counter { + unsigned base; + unsigned qwords; + unsigned stride; /* in uint64s */ +}; + +#define R600_PC_SHADERS_WINDOWING (1 << 31) + +struct r600_query_pc { + struct r600_query_hw b; + + unsigned shaders; + unsigned num_counters; + struct r600_pc_counter *counters; + struct r600_pc_group *groups; +}; + +static void r600_pc_query_destroy(struct r600_common_screen *rscreen, + struct r600_query *rquery) +{ + struct r600_query_pc *query = (struct r600_query_pc *)rquery; + + while (query->groups) { + struct r600_pc_group *group = query->groups; + query->groups = group->next; + FREE(group); + } + + FREE(query->counters); + + r600_query_hw_destroy(rscreen, rquery); +} + +static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen, + struct r600_query_hw *hwquery, + struct r600_resource *buffer) +{ + /* no-op */ + return true; +} + +static void r600_pc_query_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *hwquery, + struct r600_resource *buffer, uint64_t va) +{ + struct r600_perfcounters *pc = ctx->screen->perfcounters; + struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + struct r600_pc_group *group; + int current_se = -1; + int current_instance = -1; + + if (query->shaders) + pc->emit_shaders(ctx, query->shaders); + + for (group = query->groups; group; group = group->next) { + struct r600_perfcounter_block *block = group->block; + + if (group->se != current_se || group->instance != current_instance) { + current_se = group->se; + current_instance = group->instance; + pc->emit_instance(ctx, group->se, group->instance); + } + + pc->emit_select(ctx, block, group->num_counters, group->selectors); + } + + if (current_se != -1 || current_instance != -1) + pc->emit_instance(ctx, -1, -1); + + pc->emit_start(ctx, buffer, va); +} + +static void r600_pc_query_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *hwquery, + struct r600_resource *buffer, uint64_t va) +{ + struct r600_perfcounters *pc = ctx->screen->perfcounters; + struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + struct r600_pc_group *group; + + pc->emit_stop(ctx, buffer, va); + + for (group = query->groups; group; group = group->next) { + struct r600_perfcounter_block *block = group->block; + unsigned se = group->se >= 0 ? group->se : 0; + unsigned se_end = se + 1; + + if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0)) + se_end = ctx->screen->info.max_se; + + do { + unsigned instance = group->instance >= 0 ? group->instance : 0; + + do { + pc->emit_instance(ctx, se, instance); + pc->emit_read(ctx, block, + group->num_counters, group->selectors, + buffer, va); + va += sizeof(uint64_t) * group->num_counters; + } while (group->instance < 0 && ++instance < block->num_instances); + } while (++se < se_end); + } + + pc->emit_instance(ctx, -1, -1); +} + +static void r600_pc_query_clear_result(struct r600_query_hw *hwquery, + union pipe_query_result *result) +{ + struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + + memset(result, 0, sizeof(result->batch[0]) * query->num_counters); +} + +static void r600_pc_query_add_result(struct r600_common_screen *rscreen, + struct r600_query_hw *hwquery, + void *buffer, + union pipe_query_result *result) +{ + struct r600_query_pc *query = (struct r600_query_pc *)hwquery; + uint64_t *results = buffer; + unsigned i, j; + + for (i = 0; i < query->num_counters; ++i) { + struct r600_pc_counter *counter = &query->counters[i]; + + for (j = 0; j < counter->qwords; ++j) { + uint32_t value = results[counter->base + j * counter->stride]; + result->batch[i].u64 += value; + } + } +} + +static struct r600_query_ops batch_query_ops = { + .destroy = r600_pc_query_destroy, + .begin = r600_query_hw_begin, + .end = r600_query_hw_end, + .get_result = r600_query_hw_get_result +}; + +static struct r600_query_hw_ops batch_query_hw_ops = { + .prepare_buffer = r600_pc_query_prepare_buffer, + .emit_start = r600_pc_query_emit_start, + .emit_stop = r600_pc_query_emit_stop, + .clear_result = r600_pc_query_clear_result, + .add_result = r600_pc_query_add_result, +}; + +static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, + struct r600_query_pc *query, + struct r600_perfcounter_block *block, + unsigned sub_gid) +{ + struct r600_pc_group *group = query->groups; + + while (group) { + if (group->block == block && group->sub_gid == sub_gid) + return group; + group = group->next; + } + + group = CALLOC_STRUCT(r600_pc_group); + if (!group) + return NULL; + + group->block = block; + group->sub_gid = sub_gid; + + if (block->flags & R600_PC_BLOCK_SHADER) { + unsigned sub_gids = block->num_instances; + unsigned shader_id; + unsigned shaders; + unsigned query_shaders; + + if (block->flags & R600_PC_BLOCK_SE_GROUPS) + sub_gids = sub_gids * screen->info.max_se; + shader_id = sub_gid / sub_gids; + sub_gid = sub_gid % sub_gids; + + shaders = screen->perfcounters->shader_type_bits[shader_id]; + + query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; + if (query_shaders && query_shaders != shaders) { + fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); + FREE(group); + return NULL; + } + query->shaders = shaders; + } + + if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { + // A non-zero value in query->shaders ensures that the shader + // masking is reset unless the user explicitly requests one. + query->shaders = R600_PC_SHADERS_WINDOWING; + } + + if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + group->se = sub_gid / block->num_instances; + sub_gid = sub_gid % block->num_instances; + } else { + group->se = -1; + } + + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + group->instance = sub_gid; + } else { + group->instance = -1; + } + + group->next = query->groups; + query->groups = group; + + return group; +} + +struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, + unsigned num_queries, + unsigned *query_types) +{ + struct r600_common_screen *screen = + (struct r600_common_screen *)ctx->screen; + struct r600_perfcounters *pc = screen->perfcounters; + struct r600_perfcounter_block *block; + struct r600_pc_group *group; + struct r600_query_pc *query; + unsigned base_gid, sub_gid, sub_index; + unsigned i, j; + + if (!pc) + return NULL; + + query = CALLOC_STRUCT(r600_query_pc); + if (!query) + return NULL; + + query->b.b.ops = &batch_query_ops; + query->b.ops = &batch_query_hw_ops; + + query->num_counters = num_queries; + + /* Collect selectors per group */ + for (i = 0; i < num_queries; ++i) { + unsigned sub_gid; + + if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER) + goto error; + + block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, + &base_gid, &sub_index); + if (!block) + goto error; + + sub_gid = sub_index / block->num_selectors; + sub_index = sub_index % block->num_selectors; + + group = get_group_state(screen, query, block, sub_gid); + if (!group) + goto error; + + if (group->num_counters >= block->num_counters) { + fprintf(stderr, + "perfcounter group %s: too many selected\n", + block->basename); + goto error; + } + group->selectors[group->num_counters] = sub_index; + ++group->num_counters; + } + + /* Compute result bases and CS size per group */ + query->b.num_cs_dw_begin = pc->num_start_cs_dwords; + query->b.num_cs_dw_end = pc->num_stop_cs_dwords; + + query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ + query->b.num_cs_dw_end += pc->num_instance_cs_dwords; + + i = 0; + for (group = query->groups; group; group = group->next) { + struct r600_perfcounter_block *block = group->block; + unsigned select_dw, read_dw; + unsigned instances = 1; + + if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) + instances = screen->info.max_se; + if (group->instance < 0) + instances *= block->num_instances; + + group->result_base = i; + query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; + i += instances * group->num_counters; + + pc->get_size(block, group->num_counters, group->selectors, + &select_dw, &read_dw); + query->b.num_cs_dw_begin += select_dw; + query->b.num_cs_dw_end += instances * read_dw; + query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ + query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; + } + + if (query->shaders) { + if (query->shaders == R600_PC_SHADERS_WINDOWING) + query->shaders = 0xffffffff; + query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; + } + + /* Map user-supplied query array to result indices */ + query->counters = CALLOC(num_queries, sizeof(*query->counters)); + for (i = 0; i < num_queries; ++i) { + struct r600_pc_counter *counter = &query->counters[i]; + struct r600_perfcounter_block *block; + + block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, + &base_gid, &sub_index); + + sub_gid = sub_index / block->num_selectors; + sub_index = sub_index % block->num_selectors; + + group = get_group_state(screen, query, block, sub_gid); + assert(group != NULL); + + for (j = 0; j < group->num_counters; ++j) { + if (group->selectors[j] == sub_index) + break; + } + + counter->base = group->result_base + j; + counter->stride = group->num_counters; + + counter->qwords = 1; + if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) + counter->qwords = screen->info.max_se; + if (group->instance < 0) + counter->qwords *= block->num_instances; + } + + if (!r600_query_hw_init(screen, &query->b)) + goto error; + + return (struct pipe_query *)query; + +error: + r600_pc_query_destroy(screen, &query->b.b); + return NULL; +} + +static bool r600_init_block_names(struct r600_common_screen *screen, + struct r600_perfcounter_block *block) +{ + unsigned i, j, k; + unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; + unsigned namelen; + char *groupname; + char *p; + + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + groups_instance = block->num_instances; + if (block->flags & R600_PC_BLOCK_SE_GROUPS) + groups_se = screen->info.max_se; + if (block->flags & R600_PC_BLOCK_SHADER) + groups_shader = screen->perfcounters->num_shader_types; + + namelen = strlen(block->basename); + block->group_name_stride = namelen + 1; + if (block->flags & R600_PC_BLOCK_SHADER) + block->group_name_stride += 3; + if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + assert(groups_se <= 10); + block->group_name_stride += 1; + + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + block->group_name_stride += 1; + } + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + assert(groups_instance <= 100); + block->group_name_stride += 2; + } + + block->group_names = MALLOC(block->num_groups * block->group_name_stride); + if (!block->group_names) + return false; + + groupname = block->group_names; + for (i = 0; i < groups_shader; ++i) { + const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; + unsigned shaderlen = strlen(shader_suffix); + for (j = 0; j < groups_se; ++j) { + for (k = 0; k < groups_instance; ++k) { + strcpy(groupname, block->basename); + p = groupname + namelen; + + if (block->flags & R600_PC_BLOCK_SHADER) { + strcpy(p, shader_suffix); + p += shaderlen; + } + + if (block->flags & R600_PC_BLOCK_SE_GROUPS) { + p += sprintf(p, "%d", j); + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + *p++ = '_'; + } + + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) + p += sprintf(p, "%d", k); + + groupname += block->group_name_stride; + } + } + } + + assert(block->num_selectors <= 1000); + block->selector_name_stride = block->group_name_stride + 4; + block->selector_names = MALLOC(block->num_groups * block->num_selectors * + block->selector_name_stride); + if (!block->selector_names) + return false; + + groupname = block->group_names; + p = block->selector_names; + for (i = 0; i < block->num_groups; ++i) { + for (j = 0; j < block->num_selectors; ++j) { + sprintf(p, "%s_%03d", groupname, j); + p += block->selector_name_stride; + } + groupname += block->group_name_stride; + } + + return true; +} + +int r600_get_perfcounter_info(struct r600_common_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_perfcounters *pc = screen->perfcounters; + struct r600_perfcounter_block *block; + unsigned base_gid, sub; + + if (!pc) + return 0; + + if (!info) { + unsigned bid, num_queries = 0; + + for (bid = 0; bid < pc->num_blocks; ++bid) { + num_queries += pc->blocks[bid].num_selectors * + pc->blocks[bid].num_groups; + } + + return num_queries; + } + + block = lookup_counter(pc, index, &base_gid, &sub); + if (!block) + return 0; + + if (!block->selector_names) { + if (!r600_init_block_names(screen, block)) + return 0; + } + info->name = block->selector_names + sub * block->selector_name_stride; + info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; + info->group_id = base_gid + sub / block->num_selectors; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups) + info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; + return 1; +} + +int r600_get_perfcounter_group_info(struct r600_common_screen *screen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct r600_perfcounters *pc = screen->perfcounters; + struct r600_perfcounter_block *block; + + if (!pc) + return 0; + + if (!info) + return pc->num_groups; + + block = lookup_group(pc, &index); + if (!block) + return 0; + + if (!block->group_names) { + if (!r600_init_block_names(screen, block)) + return 0; + } + info->name = block->group_names + index * block->group_name_stride; + info->num_queries = block->num_selectors; + info->max_active_queries = block->num_counters; + return 1; +} + +void r600_perfcounters_destroy(struct r600_common_screen *rscreen) +{ + if (rscreen->perfcounters) + rscreen->perfcounters->cleanup(rscreen); +} + +bool r600_perfcounters_init(struct r600_perfcounters *pc, + unsigned num_blocks) +{ + pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block)); + if (!pc->blocks) + return false; + + pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); + pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); + + return true; +} + +void r600_perfcounters_add_block(struct r600_common_screen *rscreen, + struct r600_perfcounters *pc, + const char *name, unsigned flags, + unsigned counters, unsigned selectors, + unsigned instances, void *data) +{ + struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks]; + + assert(counters <= R600_QUERY_MAX_COUNTERS); + + block->basename = name; + block->flags = flags; + block->num_counters = counters; + block->num_selectors = selectors; + block->num_instances = MAX2(instances, 1); + block->data = data; + + if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE)) + block->flags |= R600_PC_BLOCK_SE_GROUPS; + if (pc->separate_instance && block->num_instances > 1) + block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS; + + if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { + block->num_groups = block->num_instances; + } else { + block->num_groups = 1; + } + + if (block->flags & R600_PC_BLOCK_SE_GROUPS) + block->num_groups *= rscreen->info.max_se; + if (block->flags & R600_PC_BLOCK_SHADER) + block->num_groups *= pc->num_shader_types; + + ++pc->num_blocks; + pc->num_groups += block->num_groups; +} + +void r600_perfcounters_do_destroy(struct r600_perfcounters *pc) +{ + unsigned i; + + for (i = 0; i < pc->num_blocks; ++i) { + FREE(pc->blocks[i].group_names); + FREE(pc->blocks[i].selector_names); + } + FREE(pc->blocks); + FREE(pc); +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c new file mode 100644 index 000000000..acad670d6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c @@ -0,0 +1,1433 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +#include "r600_pipe_common.h" +#include "r600_cs.h" +#include "tgsi/tgsi_parse.h" +#include "util/list.h" +#include "util/u_draw_quad.h" +#include "util/u_memory.h" +#include "util/u_format_s3tc.h" +#include "util/u_upload_mgr.h" +#include "os/os_time.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" +#include "radeon_video.h" +#include <inttypes.h> +#include <sys/utsname.h> + +#ifndef HAVE_LLVM +#define HAVE_LLVM 0 +#endif + +#if HAVE_LLVM +#include <llvm-c/TargetMachine.h> +#endif + +#ifndef MESA_LLVM_VERSION_PATCH +#define MESA_LLVM_VERSION_PATCH 0 +#endif + +struct r600_multi_fence { + struct pipe_reference reference; + struct pipe_fence_handle *gfx; + struct pipe_fence_handle *sdma; + + /* If the context wasn't flushed at fence creation, this is non-NULL. */ + struct { + struct r600_common_context *ctx; + unsigned ib_index; + } gfx_unflushed; +}; + +/* + * shader binary helpers. + */ +void radeon_shader_binary_init(struct ac_shader_binary *b) +{ + memset(b, 0, sizeof(*b)); +} + +void radeon_shader_binary_clean(struct ac_shader_binary *b) +{ + if (!b) + return; + FREE(b->code); + FREE(b->config); + FREE(b->rodata); + FREE(b->global_symbol_offsets); + FREE(b->relocs); + FREE(b->disasm_string); + FREE(b->llvm_ir_string); +} + +/* + * pipe_context + */ + +/** + * Write an EOP event. + * + * \param event EVENT_TYPE_* + * \param event_flags Optional cache flush flags (TC) + * \param data_sel 1 = fence, 3 = timestamp + * \param buf Buffer + * \param va GPU address + * \param old_value Previous fence value (for a bug workaround) + * \param new_value Fence value to write for this event. + */ +void r600_gfx_write_event_eop(struct r600_common_context *ctx, + unsigned event, unsigned event_flags, + unsigned data_sel, + struct r600_resource *buf, uint64_t va, + uint32_t new_fence, unsigned query_type) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + unsigned op = EVENT_TYPE(event) | + EVENT_INDEX(5) | + event_flags; + unsigned sel = EOP_DATA_SEL(data_sel); + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, op); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xffff) | sel); + radeon_emit(cs, new_fence); /* immediate data */ + radeon_emit(cs, 0); /* unused */ + + if (buf) + r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, + RADEON_PRIO_QUERY); +} + +unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) +{ + unsigned dwords = 6; + + if (!screen->info.has_virtual_memory) + dwords += 2; + + return dwords; +} + +void r600_gfx_wait_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, ref); /* reference value */ + radeon_emit(cs, mask); /* mask */ + radeon_emit(cs, 4); /* poll interval */ +} + +void r600_draw_rectangle(struct blitter_context *blitter, + void *vertex_elements_cso, + blitter_get_vs_func get_vs, + int x1, int y1, int x2, int y2, + float depth, unsigned num_instances, + enum blitter_attrib_type type, + const union blitter_attrib *attrib) +{ + struct r600_common_context *rctx = + (struct r600_common_context*)util_blitter_get_pipe(blitter); + struct pipe_viewport_state viewport; + struct pipe_resource *buf = NULL; + unsigned offset = 0; + float *vb; + + rctx->b.bind_vertex_elements_state(&rctx->b, vertex_elements_cso); + rctx->b.bind_vs_state(&rctx->b, get_vs(blitter)); + + /* Some operations (like color resolve on r6xx) don't work + * with the conventional primitive types. + * One that works is PT_RECTLIST, which we use here. */ + + /* setup viewport */ + viewport.scale[0] = 1.0f; + viewport.scale[1] = 1.0f; + viewport.scale[2] = 1.0f; + viewport.translate[0] = 0.0f; + viewport.translate[1] = 0.0f; + viewport.translate[2] = 0.0f; + rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport); + + /* Upload vertices. The hw rectangle has only 3 vertices, + * The 4th one is derived from the first 3. + * The vertex specification should match u_blitter's vertex element state. */ + u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24, + rctx->screen->info.tcc_cache_line_size, + &offset, &buf, (void**)&vb); + if (!buf) + return; + + vb[0] = x1; + vb[1] = y1; + vb[2] = depth; + vb[3] = 1; + + vb[8] = x1; + vb[9] = y2; + vb[10] = depth; + vb[11] = 1; + + vb[16] = x2; + vb[17] = y1; + vb[18] = depth; + vb[19] = 1; + + switch (type) { + case UTIL_BLITTER_ATTRIB_COLOR: + memcpy(vb+4, attrib->color, sizeof(float)*4); + memcpy(vb+12, attrib->color, sizeof(float)*4); + memcpy(vb+20, attrib->color, sizeof(float)*4); + break; + case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW: + case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: + vb[6] = vb[14] = vb[22] = attrib->texcoord.z; + vb[7] = vb[15] = vb[23] = attrib->texcoord.w; + /* fall through */ + vb[4] = attrib->texcoord.x1; + vb[5] = attrib->texcoord.y1; + vb[12] = attrib->texcoord.x1; + vb[13] = attrib->texcoord.y2; + vb[20] = attrib->texcoord.x2; + vb[21] = attrib->texcoord.y1; + break; + default:; /* Nothing to do. */ + } + + /* draw */ + struct pipe_vertex_buffer vbuffer = {}; + vbuffer.buffer.resource = buf; + vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = offset; + + rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, &vbuffer); + util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3, + 0, num_instances); + pipe_resource_reference(&buf, NULL); +} + +static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->dma.cs; + + if (rctx->chip_class >= EVERGREEN) + radeon_emit(cs, 0xf0000000); /* NOP */ + else { + /* TODO: R600-R700 should use the FENCE packet. + * CS checker support is required. */ + } +} + +void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, + struct r600_resource *dst, struct r600_resource *src) +{ + uint64_t vram = ctx->dma.cs->used_vram; + uint64_t gtt = ctx->dma.cs->used_gart; + + if (dst) { + vram += dst->vram_usage; + gtt += dst->gart_usage; + } + if (src) { + vram += src->vram_usage; + gtt += src->gart_usage; + } + + /* Flush the GFX IB if DMA depends on it. */ + if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && + ((dst && + ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf, + RADEON_USAGE_READWRITE)) || + (src && + ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, + RADEON_USAGE_WRITE)))) + ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + + /* Flush if there's not enough space, or if the memory usage per IB + * is too large. + * + * IBs using too little memory are limited by the IB submission overhead. + * IBs using too much memory are limited by the kernel/TTM overhead. + * Too long IBs create CPU-GPU pipeline bubbles and add latency. + * + * This heuristic makes sure that DMA requests are executed + * very soon after the call is made and lowers memory usage. + * It improves texture upload performance by keeping the DMA + * engine busy while uploads are being submitted. + */ + num_dw++; /* for emit_wait_idle below */ + if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) || + ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || + !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { + ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); + } + + /* Wait for idle if either buffer has been used in the IB before to + * prevent read-after-write hazards. + */ + if ((dst && + ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf, + RADEON_USAGE_READWRITE)) || + (src && + ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf, + RADEON_USAGE_WRITE))) + r600_dma_emit_wait_idle(ctx); + + /* If GPUVM is not supported, the CS checker needs 2 entries + * in the buffer list per packet, which has to be done manually. + */ + if (ctx->screen->info.has_virtual_memory) { + if (dst) + radeon_add_to_buffer_list(ctx, &ctx->dma, dst, + RADEON_USAGE_WRITE, + RADEON_PRIO_SDMA_BUFFER); + if (src) + radeon_add_to_buffer_list(ctx, &ctx->dma, src, + RADEON_USAGE_READ, + RADEON_PRIO_SDMA_BUFFER); + } + + /* this function is called before all DMA calls, so increment this. */ + ctx->num_dma_calls++; +} + +static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) +{ +} + +void r600_preflush_suspend_features(struct r600_common_context *ctx) +{ + /* suspend queries */ + if (!LIST_IS_EMPTY(&ctx->active_queries)) + r600_suspend_queries(ctx); + + ctx->streamout.suspended = false; + if (ctx->streamout.begin_emitted) { + r600_emit_streamout_end(ctx); + ctx->streamout.suspended = true; + } +} + +void r600_postflush_resume_features(struct r600_common_context *ctx) +{ + if (ctx->streamout.suspended) { + ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; + r600_streamout_buffers_dirty(ctx); + } + + /* resume queries */ + if (!LIST_IS_EMPTY(&ctx->active_queries)) + r600_resume_queries(ctx); +} + +static void r600_add_fence_dependency(struct r600_common_context *rctx, + struct pipe_fence_handle *fence) +{ + struct radeon_winsys *ws = rctx->ws; + + if (rctx->dma.cs) + ws->cs_add_fence_dependency(rctx->dma.cs, fence); + ws->cs_add_fence_dependency(rctx->gfx.cs, fence); +} + +static void r600_fence_server_sync(struct pipe_context *ctx, + struct pipe_fence_handle *fence) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; + + /* Only amdgpu needs to handle fence dependencies (for fence imports). + * radeon synchronizes all rings by default and will not implement + * fence imports. + */ + if (rctx->screen->info.drm_major == 2) + return; + + /* Only imported fences need to be handled by fence_server_sync, + * because the winsys handles synchronizations automatically for BOs + * within the process. + * + * Simply skip unflushed fences here, and the winsys will drop no-op + * dependencies (i.e. dependencies within the same ring). + */ + if (rfence->gfx_unflushed.ctx) + return; + + /* All unflushed commands will not start execution before + * this fence dependency is signalled. + * + * Should we flush the context to allow more GPU parallelism? + */ + if (rfence->sdma) + r600_add_fence_dependency(rctx, rfence->sdma); + if (rfence->gfx) + r600_add_fence_dependency(rctx, rfence->gfx); +} + +static void r600_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct pipe_screen *screen = ctx->screen; + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct radeon_winsys *ws = rctx->ws; + struct pipe_fence_handle *gfx_fence = NULL; + struct pipe_fence_handle *sdma_fence = NULL; + bool deferred_fence = false; + unsigned rflags = RADEON_FLUSH_ASYNC; + + if (flags & PIPE_FLUSH_END_OF_FRAME) + rflags |= RADEON_FLUSH_END_OF_FRAME; + + /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ + if (rctx->dma.cs) + rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); + + if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { + if (fence) + ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); + if (!(flags & PIPE_FLUSH_DEFERRED)) + ws->cs_sync_flush(rctx->gfx.cs); + } else { + /* Instead of flushing, create a deferred fence. Constraints: + * - The state tracker must allow a deferred flush. + * - The state tracker must request a fence. + * Thread safety in fence_finish must be ensured by the state tracker. + */ + if (flags & PIPE_FLUSH_DEFERRED && fence) { + gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); + deferred_fence = true; + } else { + rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); + } + } + + /* Both engines can signal out of order, so we need to keep both fences. */ + if (fence) { + struct r600_multi_fence *multi_fence = + CALLOC_STRUCT(r600_multi_fence); + if (!multi_fence) { + ws->fence_reference(&sdma_fence, NULL); + ws->fence_reference(&gfx_fence, NULL); + goto finish; + } + + multi_fence->reference.count = 1; + /* If both fences are NULL, fence_finish will always return true. */ + multi_fence->gfx = gfx_fence; + multi_fence->sdma = sdma_fence; + + if (deferred_fence) { + multi_fence->gfx_unflushed.ctx = rctx; + multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; + } + + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle*)multi_fence; + } +finish: + if (!(flags & PIPE_FLUSH_DEFERRED)) { + if (rctx->dma.cs) + ws->cs_sync_flush(rctx->dma.cs); + ws->cs_sync_flush(rctx->gfx.cs); + } +} + +static void r600_flush_dma_ring(void *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct radeon_winsys_cs *cs = rctx->dma.cs; + struct radeon_saved_cs saved; + bool check_vm = + (rctx->screen->debug_flags & DBG_CHECK_VM) && + rctx->check_vm_faults; + + if (!radeon_emitted(cs, 0)) { + if (fence) + rctx->ws->fence_reference(fence, rctx->last_sdma_fence); + return; + } + + if (check_vm) + radeon_save_cs(rctx->ws, cs, &saved, true); + + rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); + if (fence) + rctx->ws->fence_reference(fence, rctx->last_sdma_fence); + + if (check_vm) { + /* Use conservative timeout 800ms, after which we won't wait any + * longer and assume the GPU is hung. + */ + rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000); + + rctx->check_vm_faults(rctx, &saved, RING_DMA); + radeon_clear_saved_cs(&saved); + } +} + +/** + * Store a linearized copy of all chunks of \p cs together with the buffer + * list in \p saved. + */ +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, + struct radeon_saved_cs *saved, bool get_buffer_list) +{ + uint32_t *buf; + unsigned i; + + /* Save the IB chunks. */ + saved->num_dw = cs->prev_dw + cs->current.cdw; + saved->ib = MALLOC(4 * saved->num_dw); + if (!saved->ib) + goto oom; + + buf = saved->ib; + for (i = 0; i < cs->num_prev; ++i) { + memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); + buf += cs->prev[i].cdw; + } + memcpy(buf, cs->current.buf, cs->current.cdw * 4); + + if (!get_buffer_list) + return; + + /* Save the buffer list. */ + saved->bo_count = ws->cs_get_buffer_list(cs, NULL); + saved->bo_list = CALLOC(saved->bo_count, + sizeof(saved->bo_list[0])); + if (!saved->bo_list) { + FREE(saved->ib); + goto oom; + } + ws->cs_get_buffer_list(cs, saved->bo_list); + + return; + +oom: + fprintf(stderr, "%s: out of memory\n", __func__); + memset(saved, 0, sizeof(*saved)); +} + +void radeon_clear_saved_cs(struct radeon_saved_cs *saved) +{ + FREE(saved->ib); + FREE(saved->bo_list); + + memset(saved, 0, sizeof(*saved)); +} + +static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + unsigned latest = rctx->ws->query_value(rctx->ws, + RADEON_GPU_RESET_COUNTER); + + if (rctx->gpu_reset_counter == latest) + return PIPE_NO_RESET; + + rctx->gpu_reset_counter = latest; + return PIPE_UNKNOWN_CONTEXT_RESET; +} + +static void r600_set_debug_callback(struct pipe_context *ctx, + const struct pipe_debug_callback *cb) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + + if (cb) + rctx->debug = *cb; + else + memset(&rctx->debug, 0, sizeof(rctx->debug)); +} + +static void r600_set_device_reset_callback(struct pipe_context *ctx, + const struct pipe_device_reset_callback *cb) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + + if (cb) + rctx->device_reset_callback = *cb; + else + memset(&rctx->device_reset_callback, 0, + sizeof(rctx->device_reset_callback)); +} + +bool r600_check_device_reset(struct r600_common_context *rctx) +{ + enum pipe_reset_status status; + + if (!rctx->device_reset_callback.reset) + return false; + + if (!rctx->b.get_device_reset_status) + return false; + + status = rctx->b.get_device_reset_status(&rctx->b); + if (status == PIPE_NO_RESET) + return false; + + rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status); + return true; +} + +static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx, + struct pipe_resource *dst, + uint64_t offset, uint64_t size, + unsigned value) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + + rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE); +} + +static bool r600_resource_commit(struct pipe_context *pctx, + struct pipe_resource *resource, + unsigned level, struct pipe_box *box, + bool commit) +{ + struct r600_common_context *ctx = (struct r600_common_context *)pctx; + struct r600_resource *res = r600_resource(resource); + + /* + * Since buffer commitment changes cannot be pipelined, we need to + * (a) flush any pending commands that refer to the buffer we're about + * to change, and + * (b) wait for threaded submit to finish, including those that were + * triggered by some other, earlier operation. + */ + if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && + ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, + res->buf, RADEON_USAGE_READWRITE)) { + ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + } + if (radeon_emitted(ctx->dma.cs, 0) && + ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, + res->buf, RADEON_USAGE_READWRITE)) { + ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + } + + ctx->ws->cs_sync_flush(ctx->dma.cs); + ctx->ws->cs_sync_flush(ctx->gfx.cs); + + assert(resource->target == PIPE_BUFFER); + + return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit); +} + +bool r600_common_context_init(struct r600_common_context *rctx, + struct r600_common_screen *rscreen, + unsigned context_flags) +{ + slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers); + slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers); + + rctx->screen = rscreen; + rctx->ws = rscreen->ws; + rctx->family = rscreen->family; + rctx->chip_class = rscreen->chip_class; + + rctx->b.invalidate_resource = r600_invalidate_resource; + rctx->b.resource_commit = r600_resource_commit; + rctx->b.transfer_map = u_transfer_map_vtbl; + rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl; + rctx->b.transfer_unmap = u_transfer_unmap_vtbl; + rctx->b.texture_subdata = u_default_texture_subdata; + rctx->b.memory_barrier = r600_memory_barrier; + rctx->b.flush = r600_flush_from_st; + rctx->b.set_debug_callback = r600_set_debug_callback; + rctx->b.fence_server_sync = r600_fence_server_sync; + rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback; + + /* evergreen_compute.c has a special codepath for global buffers. + * Everything else can use the direct path. + */ + if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) && + (context_flags & PIPE_CONTEXT_COMPUTE_ONLY)) + rctx->b.buffer_subdata = u_default_buffer_subdata; + else + rctx->b.buffer_subdata = r600_buffer_subdata; + + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { + rctx->b.get_device_reset_status = r600_get_reset_status; + rctx->gpu_reset_counter = + rctx->ws->query_value(rctx->ws, + RADEON_GPU_RESET_COUNTER); + } + + rctx->b.set_device_reset_callback = r600_set_device_reset_callback; + + r600_init_context_texture_functions(rctx); + r600_init_viewport_functions(rctx); + r600_streamout_init(rctx); + r600_query_init(rctx); + cayman_init_msaa(&rctx->b); + + rctx->allocator_zeroed_memory = + u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, + 0, PIPE_USAGE_DEFAULT, 0, true); + if (!rctx->allocator_zeroed_memory) + return false; + + rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024, + 0, PIPE_USAGE_STREAM); + if (!rctx->b.stream_uploader) + return false; + + rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024, + 0, PIPE_USAGE_DEFAULT); + if (!rctx->b.const_uploader) + return false; + + rctx->ctx = rctx->ws->ctx_create(rctx->ws); + if (!rctx->ctx) + return false; + + if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { + rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, + r600_flush_dma_ring, + rctx); + rctx->dma.flush = r600_flush_dma_ring; + } + + return true; +} + +void r600_common_context_cleanup(struct r600_common_context *rctx) +{ + if (rctx->query_result_shader) + rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); + + if (rctx->gfx.cs) + rctx->ws->cs_destroy(rctx->gfx.cs); + if (rctx->dma.cs) + rctx->ws->cs_destroy(rctx->dma.cs); + if (rctx->ctx) + rctx->ws->ctx_destroy(rctx->ctx); + + if (rctx->b.stream_uploader) + u_upload_destroy(rctx->b.stream_uploader); + if (rctx->b.const_uploader) + u_upload_destroy(rctx->b.const_uploader); + + slab_destroy_child(&rctx->pool_transfers); + slab_destroy_child(&rctx->pool_transfers_unsync); + + if (rctx->allocator_zeroed_memory) { + u_suballocator_destroy(rctx->allocator_zeroed_memory); + } + rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); + rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); + r600_resource_reference(&rctx->eop_bug_scratch, NULL); +} + +/* + * pipe_screen + */ + +static const struct debug_named_value common_debug_options[] = { + /* logging */ + { "tex", DBG_TEX, "Print texture info" }, + { "nir", DBG_NIR, "Enable experimental NIR shaders" }, + { "compute", DBG_COMPUTE, "Print compute info" }, + { "vm", DBG_VM, "Print virtual addresses when creating resources" }, + { "info", DBG_INFO, "Print driver information" }, + + /* shaders */ + { "fs", DBG_FS, "Print fetch shaders" }, + { "vs", DBG_VS, "Print vertex shaders" }, + { "gs", DBG_GS, "Print geometry shaders" }, + { "ps", DBG_PS, "Print pixel shaders" }, + { "cs", DBG_CS, "Print compute shaders" }, + { "tcs", DBG_TCS, "Print tessellation control shaders" }, + { "tes", DBG_TES, "Print tessellation evaluation shaders" }, + { "noir", DBG_NO_IR, "Don't print the LLVM IR"}, + { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"}, + { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"}, + { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" }, + { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" }, + { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." }, + + { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." }, + { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." }, + { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." }, + { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." }, + + /* features */ + { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" }, + { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" }, + /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ + { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, + { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, + { "notiling", DBG_NO_TILING, "Disable tiling" }, + { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, + { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, + { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, + { "nowc", DBG_NO_WC, "Disable GTT write combining" }, + { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, + { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, + + DEBUG_NAMED_VALUE_END /* must be last */ +}; + +static const char* r600_get_vendor(struct pipe_screen* pscreen) +{ + return "X.Org"; +} + +static const char* r600_get_device_vendor(struct pipe_screen* pscreen) +{ + return "AMD"; +} + +static const char *r600_get_marketing_name(struct radeon_winsys *ws) +{ + if (!ws->get_chip_name) + return NULL; + return ws->get_chip_name(ws); +} + +static const char *r600_get_family_name(const struct r600_common_screen *rscreen) +{ + switch (rscreen->info.family) { + case CHIP_R600: return "AMD R600"; + case CHIP_RV610: return "AMD RV610"; + case CHIP_RV630: return "AMD RV630"; + case CHIP_RV670: return "AMD RV670"; + case CHIP_RV620: return "AMD RV620"; + case CHIP_RV635: return "AMD RV635"; + case CHIP_RS780: return "AMD RS780"; + case CHIP_RS880: return "AMD RS880"; + case CHIP_RV770: return "AMD RV770"; + case CHIP_RV730: return "AMD RV730"; + case CHIP_RV710: return "AMD RV710"; + case CHIP_RV740: return "AMD RV740"; + case CHIP_CEDAR: return "AMD CEDAR"; + case CHIP_REDWOOD: return "AMD REDWOOD"; + case CHIP_JUNIPER: return "AMD JUNIPER"; + case CHIP_CYPRESS: return "AMD CYPRESS"; + case CHIP_HEMLOCK: return "AMD HEMLOCK"; + case CHIP_PALM: return "AMD PALM"; + case CHIP_SUMO: return "AMD SUMO"; + case CHIP_SUMO2: return "AMD SUMO2"; + case CHIP_BARTS: return "AMD BARTS"; + case CHIP_TURKS: return "AMD TURKS"; + case CHIP_CAICOS: return "AMD CAICOS"; + case CHIP_CAYMAN: return "AMD CAYMAN"; + case CHIP_ARUBA: return "AMD ARUBA"; + default: return "AMD unknown"; + } +} + +static void r600_disk_cache_create(struct r600_common_screen *rscreen) +{ + /* Don't use the cache if shader dumping is enabled. */ + if (rscreen->debug_flags & DBG_ALL_SHADERS) + return; + + uint32_t mesa_timestamp; + if (disk_cache_get_function_timestamp(r600_disk_cache_create, + &mesa_timestamp)) { + char *timestamp_str; + int res = -1; + + res = asprintf(×tamp_str, "%u",mesa_timestamp); + if (res != -1) { + /* These flags affect shader compilation. */ + uint64_t shader_debug_flags = + rscreen->debug_flags & + (DBG_FS_CORRECT_DERIVS_AFTER_KILL | + DBG_UNSAFE_MATH); + + rscreen->disk_shader_cache = + disk_cache_create(r600_get_family_name(rscreen), + timestamp_str, + shader_debug_flags); + free(timestamp_str); + } + } +} + +static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; + return rscreen->disk_shader_cache; +} + +static const char* r600_get_name(struct pipe_screen* pscreen) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; + + return rscreen->renderer_string; +} + +static float r600_get_paramf(struct pipe_screen* pscreen, + enum pipe_capf param) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen; + + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + if (rscreen->family >= CHIP_CEDAR) + return 16384.0f; + else + return 8192.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + } + return 0.0f; +} + +static int r600_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile, entrypoint); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + return PIPE_FORMAT_NV12; + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: + return false; + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: + return false; + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: + return true; + case PIPE_VIDEO_CAP_MAX_LEVEL: + return vl_level_supported(screen, profile); + default: + return 0; + } +} + +const char *r600_get_llvm_processor_name(enum radeon_family family) +{ + switch (family) { + case CHIP_R600: + case CHIP_RV630: + case CHIP_RV635: + case CHIP_RV670: + return "r600"; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + return "rs880"; + case CHIP_RV710: + return "rv710"; + case CHIP_RV730: + return "rv730"; + case CHIP_RV740: + case CHIP_RV770: + return "rv770"; + case CHIP_PALM: + case CHIP_CEDAR: + return "cedar"; + case CHIP_SUMO: + case CHIP_SUMO2: + return "sumo"; + case CHIP_REDWOOD: + return "redwood"; + case CHIP_JUNIPER: + return "juniper"; + case CHIP_HEMLOCK: + case CHIP_CYPRESS: + return "cypress"; + case CHIP_BARTS: + return "barts"; + case CHIP_TURKS: + return "turks"; + case CHIP_CAICOS: + return "caicos"; + case CHIP_CAYMAN: + case CHIP_ARUBA: + return "cayman"; + + default: + return ""; + } +} + +static unsigned get_max_threads_per_block(struct r600_common_screen *screen, + enum pipe_shader_ir ir_type) +{ + return 256; +} + +static int r600_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + + //TODO: select these params by asic + switch (param) { + case PIPE_COMPUTE_CAP_IR_TARGET: { + const char *gpu; + const char *triple = "r600--"; + gpu = r600_get_llvm_processor_name(rscreen->family); + if (ret) { + sprintf(ret, "%s-%s", gpu, triple); + } + /* +2 for dash and terminating NIL byte */ + return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); + } + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + if (ret) { + uint64_t *grid_dimension = ret; + grid_dimension[0] = 3; + } + return 1 * sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + if (ret) { + uint64_t *grid_size = ret; + grid_size[0] = 65535; + grid_size[1] = 65535; + grid_size[2] = 65535; + } + return 3 * sizeof(uint64_t) ; + + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + if (ret) { + uint64_t *block_size = ret; + unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); + block_size[0] = threads_per_block; + block_size[1] = threads_per_block; + block_size[2] = threads_per_block; + } + return 3 * sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_threads_per_block = ret; + *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_ADDRESS_BITS: + if (ret) { + uint32_t *address_bits = ret; + address_bits[0] = 32; + } + return 1 * sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + if (ret) { + uint64_t *max_global_size = ret; + uint64_t max_mem_alloc_size; + + r600_get_compute_param(screen, ir_type, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + &max_mem_alloc_size); + + /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least + * 1/4 of the MAX_GLOBAL_SIZE. Since the + * MAX_MEM_ALLOC_SIZE is fixed for older kernels, + * make sure we never report more than + * 4 * MAX_MEM_ALLOC_SIZE. + */ + *max_global_size = MIN2(4 * max_mem_alloc_size, + MAX2(rscreen->info.gart_size, + rscreen->info.vram_size)); + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + if (ret) { + uint64_t *max_local_size = ret; + /* Value reported by the closed source driver. */ + *max_local_size = 32768; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: + if (ret) { + uint64_t *max_input_size = ret; + /* Value reported by the closed source driver. */ + *max_input_size = 1024; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + if (ret) { + uint64_t *max_mem_alloc_size = ret; + + *max_mem_alloc_size = rscreen->info.max_alloc_size; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + if (ret) { + uint32_t *max_clock_frequency = ret; + *max_clock_frequency = rscreen->info.max_shader_clock; + } + return sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + if (ret) { + uint32_t *max_compute_units = ret; + *max_compute_units = rscreen->info.num_good_compute_units; + } + return sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + if (ret) { + uint32_t *images_supported = ret; + *images_supported = 0; + } + return sizeof(uint32_t); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: + break; /* unused */ + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + if (ret) { + uint32_t *subgroup_size = ret; + *subgroup_size = r600_wavefront_size(rscreen->family); + } + return sizeof(uint32_t); + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_variable_threads_per_block = ret; + *max_variable_threads_per_block = 0; + } + return sizeof(uint64_t); + } + + fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); + return 0; +} + +static uint64_t r600_get_timestamp(struct pipe_screen *screen) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + + return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / + rscreen->info.clock_crystal_freq; +} + +static void r600_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **dst, + struct pipe_fence_handle *src) +{ + struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; + struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst; + struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src; + + if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { + ws->fence_reference(&(*rdst)->gfx, NULL); + ws->fence_reference(&(*rdst)->sdma, NULL); + FREE(*rdst); + } + *rdst = rsrc; +} + +static boolean r600_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; + struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; + struct r600_common_context *rctx; + int64_t abs_timeout = os_time_get_absolute_timeout(timeout); + + ctx = threaded_context_unwrap_sync(ctx); + rctx = ctx ? (struct r600_common_context*)ctx : NULL; + + if (rfence->sdma) { + if (!rws->fence_wait(rws, rfence->sdma, timeout)) + return false; + + /* Recompute the timeout after waiting. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } + } + + if (!rfence->gfx) + return true; + + /* Flush the gfx IB if it hasn't been flushed yet. */ + if (rctx && + rfence->gfx_unflushed.ctx == rctx && + rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { + rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); + rfence->gfx_unflushed.ctx = NULL; + + if (!timeout) + return false; + + /* Recompute the timeout after all that. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } + } + + return rws->fence_wait(rws, rfence->gfx, timeout); +} + +static void r600_query_memory_info(struct pipe_screen *screen, + struct pipe_memory_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_winsys *ws = rscreen->ws; + unsigned vram_usage, gtt_usage; + + info->total_device_memory = rscreen->info.vram_size / 1024; + info->total_staging_memory = rscreen->info.gart_size / 1024; + + /* The real TTM memory usage is somewhat random, because: + * + * 1) TTM delays freeing memory, because it can only free it after + * fences expire. + * + * 2) The memory usage can be really low if big VRAM evictions are + * taking place, but the real usage is well above the size of VRAM. + * + * Instead, return statistics of this process. + */ + vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; + gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; + + info->avail_device_memory = + vram_usage <= info->total_device_memory ? + info->total_device_memory - vram_usage : 0; + info->avail_staging_memory = + gtt_usage <= info->total_staging_memory ? + info->total_staging_memory - gtt_usage : 0; + + info->device_memory_evicted = + ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; + + if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) + info->nr_device_memory_evictions = + ws->query_value(ws, RADEON_NUM_EVICTIONS); + else + /* Just return the number of evicted 64KB pages. */ + info->nr_device_memory_evictions = info->device_memory_evicted / 64; +} + +struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + if (templ->target == PIPE_BUFFER) { + return r600_buffer_create(screen, templ, 256); + } else { + return r600_texture_create(screen, templ); + } +} + +bool r600_common_screen_init(struct r600_common_screen *rscreen, + struct radeon_winsys *ws) +{ + char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; + struct utsname uname_data; + const char *chip_name; + + ws->query_info(ws, &rscreen->info); + rscreen->ws = ws; + + if ((chip_name = r600_get_marketing_name(ws))) + snprintf(family_name, sizeof(family_name), "%s / ", + r600_get_family_name(rscreen) + 4); + else + chip_name = r600_get_family_name(rscreen); + + if (uname(&uname_data) == 0) + snprintf(kernel_version, sizeof(kernel_version), + " / %s", uname_data.release); + + if (HAVE_LLVM > 0) { + snprintf(llvm_string, sizeof(llvm_string), + ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, + HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); + } + + snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), + "%s (%sDRM %i.%i.%i%s%s)", + chip_name, family_name, rscreen->info.drm_major, + rscreen->info.drm_minor, rscreen->info.drm_patchlevel, + kernel_version, llvm_string); + + rscreen->b.get_name = r600_get_name; + rscreen->b.get_vendor = r600_get_vendor; + rscreen->b.get_device_vendor = r600_get_device_vendor; + rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache; + rscreen->b.get_compute_param = r600_get_compute_param; + rscreen->b.get_paramf = r600_get_paramf; + rscreen->b.get_timestamp = r600_get_timestamp; + rscreen->b.fence_finish = r600_fence_finish; + rscreen->b.fence_reference = r600_fence_reference; + rscreen->b.resource_destroy = u_resource_destroy_vtbl; + rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; + rscreen->b.query_memory_info = r600_query_memory_info; + + if (rscreen->info.has_hw_decode) { + rscreen->b.get_video_param = rvid_get_video_param; + rscreen->b.is_video_format_supported = rvid_is_format_supported; + } else { + rscreen->b.get_video_param = r600_get_video_param; + rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; + } + + r600_init_screen_texture_functions(rscreen); + r600_init_screen_query_functions(rscreen); + + rscreen->family = rscreen->info.family; + rscreen->chip_class = rscreen->info.chip_class; + rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); + + r600_disk_cache_create(rscreen); + + slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); + + rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); + if (rscreen->force_aniso >= 0) { + printf("radeon: Forcing anisotropy filter to %ix\n", + /* round down to a power of two */ + 1 << util_logbase2(rscreen->force_aniso)); + } + + (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); + (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); + + if (rscreen->debug_flags & DBG_INFO) { + printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", + rscreen->info.pci_domain, rscreen->info.pci_bus, + rscreen->info.pci_dev, rscreen->info.pci_func); + printf("pci_id = 0x%x\n", rscreen->info.pci_id); + printf("family = %i (%s)\n", rscreen->info.family, + r600_get_family_name(rscreen)); + printf("chip_class = %i\n", rscreen->info.chip_class); + printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size); + printf("gart_page_size = %u\n", rscreen->info.gart_page_size); + printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); + printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); + printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); + printf("max_alloc_size = %i MB\n", + (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); + printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); + printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); + printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); + printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); + printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); + printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); + printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings); + printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); + printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); + printf("me_fw_version = %i\n", rscreen->info.me_fw_version); + printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); + printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); + printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); + printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); + printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size); + printf("drm = %i.%i.%i\n", rscreen->info.drm_major, + rscreen->info.drm_minor, rscreen->info.drm_patchlevel); + printf("has_userptr = %i\n", rscreen->info.has_userptr); + printf("has_syncobj = %u\n", rscreen->info.has_syncobj); + + printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); + printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); + printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); + printf("max_se = %i\n", rscreen->info.max_se); + printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); + + printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); + printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); + printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); + printf("num_render_backends = %i\n", rscreen->info.num_render_backends); + printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); + printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); + printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask); + printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment); + } + return true; +} + +void r600_destroy_common_screen(struct r600_common_screen *rscreen) +{ + r600_perfcounters_destroy(rscreen); + r600_gpu_load_kill_thread(rscreen); + + mtx_destroy(&rscreen->gpu_load_mutex); + mtx_destroy(&rscreen->aux_context_lock); + rscreen->aux_context->destroy(rscreen->aux_context); + + slab_destroy_parent(&rscreen->pool_transfers); + + disk_cache_destroy(rscreen->disk_shader_cache); + rscreen->ws->destroy(rscreen->ws); + FREE(rscreen); +} + +bool r600_can_dump_shader(struct r600_common_screen *rscreen, + unsigned processor) +{ + return rscreen->debug_flags & (1 << processor); +} + +bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor) +{ + return (rscreen->debug_flags & DBG_CHECK_IR) || + r600_can_dump_shader(rscreen, processor); +} + +void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value) +{ + struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; + + mtx_lock(&rscreen->aux_context_lock); + rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value); + rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); + mtx_unlock(&rscreen->aux_context_lock); +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h new file mode 100644 index 000000000..a6406cfdb --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h @@ -0,0 +1,932 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +/** + * This file contains common screen and context structures and functions + * for r600g and radeonsi. + */ + +#ifndef R600_PIPE_COMMON_H +#define R600_PIPE_COMMON_H + +#include <stdio.h> + +#include "amd/common/ac_binary.h" + +#include "radeon/radeon_winsys.h" + +#include "util/disk_cache.h" +#include "util/u_blitter.h" +#include "util/list.h" +#include "util/u_range.h" +#include "util/slab.h" +#include "util/u_suballoc.h" +#include "util/u_transfer.h" +#include "util/u_threaded_context.h" + +struct u_log_context; + +#define ATI_VENDOR_ID 0x1002 + +#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) +#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) +#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) +#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) + +#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) +/* Pipeline & streamout query controls. */ +#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) +#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) +#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3) +#define R600_CONTEXT_PRIVATE_FLAG (1u << 4) + +/* special primitive types */ +#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX + +#define R600_NOT_QUERY 0xffffffff + +/* Debug flags. */ +#define DBG_VS (1 << PIPE_SHADER_VERTEX) +#define DBG_PS (1 << PIPE_SHADER_FRAGMENT) +#define DBG_GS (1 << PIPE_SHADER_GEOMETRY) +#define DBG_TCS (1 << PIPE_SHADER_TESS_CTRL) +#define DBG_TES (1 << PIPE_SHADER_TESS_EVAL) +#define DBG_CS (1 << PIPE_SHADER_COMPUTE) +#define DBG_ALL_SHADERS (DBG_FS - 1) +#define DBG_FS (1 << 6) /* fetch shader */ +#define DBG_TEX (1 << 7) +#define DBG_NIR (1 << 8) +#define DBG_COMPUTE (1 << 9) +/* gap */ +#define DBG_VM (1 << 11) +#define DBG_NO_IR (1 << 12) +#define DBG_NO_TGSI (1 << 13) +#define DBG_NO_ASM (1 << 14) +#define DBG_PREOPT_IR (1 << 15) +#define DBG_CHECK_IR (1 << 16) +#define DBG_NO_OPT_VARIANT (1 << 17) +#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18) +/* gaps */ +#define DBG_TEST_DMA (1 << 20) +/* Bits 21-31 are reserved for the r600g driver. */ +/* features */ +#define DBG_NO_ASYNC_DMA (1ull << 32) +#define DBG_NO_HYPERZ (1ull << 33) +#define DBG_NO_DISCARD_RANGE (1ull << 34) +#define DBG_NO_2D_TILING (1ull << 35) +#define DBG_NO_TILING (1ull << 36) +#define DBG_SWITCH_ON_EOP (1ull << 37) +#define DBG_FORCE_DMA (1ull << 38) +#define DBG_PRECOMPILE (1ull << 39) +#define DBG_INFO (1ull << 40) +#define DBG_NO_WC (1ull << 41) +#define DBG_CHECK_VM (1ull << 42) +/* gap */ +#define DBG_UNSAFE_MATH (1ull << 49) +#define DBG_TEST_VMFAULT_CP (1ull << 51) +#define DBG_TEST_VMFAULT_SDMA (1ull << 52) +#define DBG_TEST_VMFAULT_SHADER (1ull << 53) + +#define R600_MAP_BUFFER_ALIGNMENT 64 +#define R600_MAX_VIEWPORTS 16 + +#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024 + +enum r600_coherency { + R600_COHERENCY_NONE, /* no cache flushes needed */ + R600_COHERENCY_SHADER, + R600_COHERENCY_CB_META, +}; + +#ifdef PIPE_ARCH_BIG_ENDIAN +#define R600_BIG_ENDIAN 1 +#else +#define R600_BIG_ENDIAN 0 +#endif + +struct r600_common_context; +struct r600_perfcounters; +struct tgsi_shader_info; +struct r600_qbo_state; + +void radeon_shader_binary_init(struct ac_shader_binary *b); +void radeon_shader_binary_clean(struct ac_shader_binary *b); + +/* Only 32-bit buffer allocations are supported, gallium doesn't support more + * at the moment. + */ +struct r600_resource { + struct threaded_resource b; + + /* Winsys objects. */ + struct pb_buffer *buf; + uint64_t gpu_address; + /* Memory usage if the buffer placement is optimal. */ + uint64_t vram_usage; + uint64_t gart_usage; + + /* Resource properties. */ + uint64_t bo_size; + unsigned bo_alignment; + enum radeon_bo_domain domains; + enum radeon_bo_flag flags; + unsigned bind_history; + + /* The buffer range which is initialized (with a write transfer, + * streamout, DMA, or as a random access target). The rest of + * the buffer is considered invalid and can be mapped unsynchronized. + * + * This allows unsychronized mapping of a buffer range which hasn't + * been used yet. It's for applications which forget to use + * the unsynchronized map flag and expect the driver to figure it out. + */ + struct util_range valid_buffer_range; + + /* Whether the resource has been exported via resource_get_handle. */ + unsigned external_usage; /* PIPE_HANDLE_USAGE_* */ + + /* Whether this resource is referenced by bindless handles. */ + bool texture_handle_allocated; + bool image_handle_allocated; +}; + +struct r600_transfer { + struct threaded_transfer b; + struct r600_resource *staging; + unsigned offset; +}; + +struct r600_fmask_info { + uint64_t offset; + uint64_t size; + unsigned alignment; + unsigned pitch_in_pixels; + unsigned bank_height; + unsigned slice_tile_max; + unsigned tile_mode_index; + unsigned tile_swizzle; +}; + +struct r600_cmask_info { + uint64_t offset; + uint64_t size; + unsigned alignment; + unsigned slice_tile_max; + uint64_t base_address_reg; +}; + +struct r600_texture { + struct r600_resource resource; + + uint64_t size; + unsigned num_level0_transfers; + enum pipe_format db_render_format; + bool is_depth; + bool db_compatible; + bool can_sample_z; + bool can_sample_s; + unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ + unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ + struct r600_texture *flushed_depth_texture; + struct radeon_surf surface; + + /* Colorbuffer compression and fast clear. */ + struct r600_fmask_info fmask; + struct r600_cmask_info cmask; + struct r600_resource *cmask_buffer; + unsigned cb_color_info; /* fast clear enable bit */ + unsigned color_clear_value[2]; + unsigned last_msaa_resolve_target_micro_mode; + + /* Depth buffer compression and fast clear. */ + uint64_t htile_offset; + bool depth_cleared; /* if it was cleared at least once */ + float depth_clear_value; + bool stencil_cleared; /* if it was cleared at least once */ + uint8_t stencil_clear_value; + + bool non_disp_tiling; /* R600-Cayman only */ + + /* Counter that should be non-zero if the texture is bound to a + * framebuffer. Implemented in radeonsi only. + */ + uint32_t framebuffers_bound; +}; + +struct r600_surface { + struct pipe_surface base; + + /* These can vary with block-compressed textures. */ + unsigned width0; + unsigned height0; + + bool color_initialized; + bool depth_initialized; + + /* Misc. color flags. */ + bool alphatest_bypass; + bool export_16bpc; + bool color_is_int8; + bool color_is_int10; + + /* Color registers. */ + unsigned cb_color_info; + unsigned cb_color_base; + unsigned cb_color_view; + unsigned cb_color_size; /* R600 only */ + unsigned cb_color_dim; /* EG only */ + unsigned cb_color_pitch; /* EG and later */ + unsigned cb_color_slice; /* EG and later */ + unsigned cb_color_attrib; /* EG and later */ + unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ + unsigned cb_color_fmask_slice; /* EG and later */ + unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ + unsigned cb_color_mask; /* R600 only */ + struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ + struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ + + /* DB registers. */ + uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ + uint64_t db_stencil_base; /* EG and later */ + uint64_t db_htile_data_base; + unsigned db_depth_info; /* R600 only, then SI and later */ + unsigned db_z_info; /* EG and later */ + unsigned db_depth_view; + unsigned db_depth_size; + unsigned db_depth_slice; /* EG and later */ + unsigned db_stencil_info; /* EG and later */ + unsigned db_prefetch_limit; /* R600 only */ + unsigned db_htile_surface; + unsigned db_preload_control; /* EG and later */ +}; + +struct r600_mmio_counter { + unsigned busy; + unsigned idle; +}; + +union r600_mmio_counters { + struct { + /* For global GPU load including SDMA. */ + struct r600_mmio_counter gpu; + + /* GRBM_STATUS */ + struct r600_mmio_counter spi; + struct r600_mmio_counter gui; + struct r600_mmio_counter ta; + struct r600_mmio_counter gds; + struct r600_mmio_counter vgt; + struct r600_mmio_counter ia; + struct r600_mmio_counter sx; + struct r600_mmio_counter wd; + struct r600_mmio_counter bci; + struct r600_mmio_counter sc; + struct r600_mmio_counter pa; + struct r600_mmio_counter db; + struct r600_mmio_counter cp; + struct r600_mmio_counter cb; + + /* SRBM_STATUS2 */ + struct r600_mmio_counter sdma; + + /* CP_STAT */ + struct r600_mmio_counter pfp; + struct r600_mmio_counter meq; + struct r600_mmio_counter me; + struct r600_mmio_counter surf_sync; + struct r600_mmio_counter cp_dma; + struct r600_mmio_counter scratch_ram; + } named; + unsigned array[0]; +}; + +struct r600_memory_object { + struct pipe_memory_object b; + struct pb_buffer *buf; + uint32_t stride; + uint32_t offset; +}; + +struct r600_common_screen { + struct pipe_screen b; + struct radeon_winsys *ws; + enum radeon_family family; + enum chip_class chip_class; + struct radeon_info info; + uint64_t debug_flags; + bool has_cp_dma; + bool has_streamout; + + struct disk_cache *disk_shader_cache; + + struct slab_parent_pool pool_transfers; + + /* Texture filter settings. */ + int force_aniso; /* -1 = disabled */ + + /* Auxiliary context. Mainly used to initialize resources. + * It must be locked prior to using and flushed before unlocking. */ + struct pipe_context *aux_context; + mtx_t aux_context_lock; + + /* This must be in the screen, because UE4 uses one context for + * compilation and another one for rendering. + */ + unsigned num_compilations; + /* Along with ST_DEBUG=precompile, this should show if applications + * are loading shaders on demand. This is a monotonic counter. + */ + unsigned num_shaders_created; + unsigned num_shader_cache_hits; + + /* GPU load thread. */ + mtx_t gpu_load_mutex; + thrd_t gpu_load_thread; + union r600_mmio_counters mmio_counters; + volatile unsigned gpu_load_stop_thread; /* bool */ + + char renderer_string[100]; + + /* Performance counters. */ + struct r600_perfcounters *perfcounters; + + /* If pipe_screen wants to recompute and re-emit the framebuffer, + * sampler, and image states of all contexts, it should atomically + * increment this. + * + * Each context will compare this with its own last known value of + * the counter before drawing and re-emit the states accordingly. + */ + unsigned dirty_tex_counter; + + /* Atomically increment this counter when an existing texture's + * metadata is enabled or disabled in a way that requires changing + * contexts' compressed texture binding masks. + */ + unsigned compressed_colortex_counter; + + struct { + /* Context flags to set so that all writes from earlier jobs + * in the CP are seen by L2 clients. + */ + unsigned cp_to_L2; + + /* Context flags to set so that all writes from earlier jobs + * that end in L2 are seen by CP. + */ + unsigned L2_to_cp; + + /* Context flags to set so that all writes from earlier + * compute jobs are seen by L2 clients. + */ + unsigned compute_to_L2; + } barrier_flags; + + void (*query_opaque_metadata)(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct radeon_bo_metadata *md); + + void (*apply_opaque_metadata)(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct radeon_bo_metadata *md); +}; + +/* This encapsulates a state or an operation which can emitted into the GPU + * command stream. */ +struct r600_atom { + void (*emit)(struct r600_common_context *ctx, struct r600_atom *state); + unsigned num_dw; + unsigned short id; +}; + +struct r600_so_target { + struct pipe_stream_output_target b; + + /* The buffer where BUFFER_FILLED_SIZE is stored. */ + struct r600_resource *buf_filled_size; + unsigned buf_filled_size_offset; + bool buf_filled_size_valid; + + unsigned stride_in_dw; +}; + +struct r600_streamout { + struct r600_atom begin_atom; + bool begin_emitted; + unsigned num_dw_for_end; + + unsigned enabled_mask; + unsigned num_targets; + struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS]; + + unsigned append_bitmask; + bool suspended; + + /* External state which comes from the vertex shader, + * it must be set explicitly when binding a shader. */ + uint16_t *stride_in_dw; + unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */ + + /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */ + unsigned hw_enabled_mask; + + /* The state of VGT_STRMOUT_(CONFIG|EN). */ + struct r600_atom enable_atom; + bool streamout_enabled; + bool prims_gen_query_enabled; + int num_prims_gen_queries; +}; + +struct r600_signed_scissor { + int minx; + int miny; + int maxx; + int maxy; +}; + +struct r600_scissors { + struct r600_atom atom; + unsigned dirty_mask; + struct pipe_scissor_state states[R600_MAX_VIEWPORTS]; +}; + +struct r600_viewports { + struct r600_atom atom; + unsigned dirty_mask; + unsigned depth_range_dirty_mask; + struct pipe_viewport_state states[R600_MAX_VIEWPORTS]; + struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS]; +}; + +struct r600_ring { + struct radeon_winsys_cs *cs; + void (*flush)(void *ctx, unsigned flags, + struct pipe_fence_handle **fence); +}; + +/* Saved CS data for debugging features. */ +struct radeon_saved_cs { + uint32_t *ib; + unsigned num_dw; + + struct radeon_bo_list_item *bo_list; + unsigned bo_count; +}; + +struct r600_common_context { + struct pipe_context b; /* base class */ + + struct r600_common_screen *screen; + struct radeon_winsys *ws; + struct radeon_winsys_ctx *ctx; + enum radeon_family family; + enum chip_class chip_class; + struct r600_ring gfx; + struct r600_ring dma; + struct pipe_fence_handle *last_gfx_fence; + struct pipe_fence_handle *last_sdma_fence; + struct r600_resource *eop_bug_scratch; + unsigned num_gfx_cs_flushes; + unsigned initial_gfx_cs_size; + unsigned gpu_reset_counter; + unsigned last_dirty_tex_counter; + unsigned last_compressed_colortex_counter; + unsigned last_num_draw_calls; + + struct threaded_context *tc; + struct u_suballocator *allocator_zeroed_memory; + struct slab_child_pool pool_transfers; + struct slab_child_pool pool_transfers_unsync; /* for threaded_context */ + + /* Current unaccounted memory usage. */ + uint64_t vram; + uint64_t gtt; + + /* States. */ + struct r600_streamout streamout; + struct r600_scissors scissors; + struct r600_viewports viewports; + bool scissor_enabled; + bool clip_halfz; + bool vs_writes_viewport_index; + bool vs_disables_clipping_viewport; + + /* Additional context states. */ + unsigned flags; /* flush flags */ + + /* Queries. */ + /* Maintain the list of active queries for pausing between IBs. */ + int num_occlusion_queries; + int num_perfect_occlusion_queries; + struct list_head active_queries; + unsigned num_cs_dw_queries_suspend; + /* Misc stats. */ + unsigned num_draw_calls; + unsigned num_decompress_calls; + unsigned num_mrt_draw_calls; + unsigned num_prim_restart_calls; + unsigned num_spill_draw_calls; + unsigned num_compute_calls; + unsigned num_spill_compute_calls; + unsigned num_dma_calls; + unsigned num_cp_dma_calls; + unsigned num_vs_flushes; + unsigned num_ps_flushes; + unsigned num_cs_flushes; + unsigned num_cb_cache_flushes; + unsigned num_db_cache_flushes; + unsigned num_L2_invalidates; + unsigned num_L2_writebacks; + unsigned num_resident_handles; + uint64_t num_alloc_tex_transfer_bytes; + + /* Render condition. */ + struct r600_atom render_cond_atom; + struct pipe_query *render_cond; + unsigned render_cond_mode; + bool render_cond_invert; + bool render_cond_force_off; /* for u_blitter */ + + /* MSAA sample locations. + * The first index is the sample index. + * The second index is the coordinate: X, Y. */ + float sample_locations_1x[1][2]; + float sample_locations_2x[2][2]; + float sample_locations_4x[4][2]; + float sample_locations_8x[8][2]; + float sample_locations_16x[16][2]; + + struct pipe_debug_callback debug; + struct pipe_device_reset_callback device_reset_callback; + struct u_log_context *log; + + void *query_result_shader; + + /* Copy one resource to another using async DMA. */ + void (*dma_copy)(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box); + + void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value); + + void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value, + enum r600_coherency coher); + + void (*blit_decompress_depth)(struct pipe_context *ctx, + struct r600_texture *texture, + struct r600_texture *staging, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned first_sample, unsigned last_sample); + + /* Reallocate the buffer and update all resource bindings where + * the buffer is bound, including all resource descriptors. */ + void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); + + /* Update all resource bindings where the buffer is bound, including + * all resource descriptors. This is invalidate_buffer without + * the invalidation. */ + void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf, + uint64_t old_gpu_address); + + void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st); + + /* This ensures there is enough space in the command stream. */ + void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw, + bool include_draw_vbo); + + void (*set_atom_dirty)(struct r600_common_context *ctx, + struct r600_atom *atom, bool dirty); + + void (*check_vm_faults)(struct r600_common_context *ctx, + struct radeon_saved_cs *saved, + enum ring_type ring); +}; + +/* r600_buffer_common.c */ +bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, + struct pb_buffer *buf, + enum radeon_bo_usage usage); +void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, + struct r600_resource *resource, + unsigned usage); +void r600_buffer_subdata(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned usage, unsigned offset, + unsigned size, const void *data); +void r600_init_resource_fields(struct r600_common_screen *rscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment); +bool r600_alloc_resource(struct r600_common_screen *rscreen, + struct r600_resource *res); +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment); +struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, + unsigned flags, + unsigned usage, + unsigned size, + unsigned alignment); +struct pipe_resource * +r600_buffer_from_user_memory(struct pipe_screen *screen, + const struct pipe_resource *templ, + void *user_memory); +void +r600_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource); +void r600_replace_buffer_storage(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src); + +/* r600_common_pipe.c */ +void r600_gfx_write_event_eop(struct r600_common_context *ctx, + unsigned event, unsigned event_flags, + unsigned data_sel, + struct r600_resource *buf, uint64_t va, + uint32_t new_fence, unsigned query_type); +unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); +void r600_gfx_wait_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask); +void r600_draw_rectangle(struct blitter_context *blitter, + void *vertex_elements_cso, + blitter_get_vs_func get_vs, + int x1, int y1, int x2, int y2, + float depth, unsigned num_instances, + enum blitter_attrib_type type, + const union blitter_attrib *attrib); +bool r600_common_screen_init(struct r600_common_screen *rscreen, + struct radeon_winsys *ws); +void r600_destroy_common_screen(struct r600_common_screen *rscreen); +void r600_preflush_suspend_features(struct r600_common_context *ctx); +void r600_postflush_resume_features(struct r600_common_context *ctx); +bool r600_common_context_init(struct r600_common_context *rctx, + struct r600_common_screen *rscreen, + unsigned context_flags); +void r600_common_context_cleanup(struct r600_common_context *rctx); +bool r600_can_dump_shader(struct r600_common_screen *rscreen, + unsigned processor); +bool r600_extra_shader_checks(struct r600_common_screen *rscreen, + unsigned processor); +void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value); +struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, + const struct pipe_resource *templ); +const char *r600_get_llvm_processor_name(enum radeon_family family); +void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, + struct r600_resource *dst, struct r600_resource *src); +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, + struct radeon_saved_cs *saved, bool get_buffer_list); +void radeon_clear_saved_cs(struct radeon_saved_cs *saved); +bool r600_check_device_reset(struct r600_common_context *rctx); + +/* r600_gpu_load.c */ +void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); +uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type); +unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type, + uint64_t begin); + +/* r600_perfcounters.c */ +void r600_perfcounters_destroy(struct r600_common_screen *rscreen); + +/* r600_query.c */ +void r600_init_screen_query_functions(struct r600_common_screen *rscreen); +void r600_query_init(struct r600_common_context *rctx); +void r600_suspend_queries(struct r600_common_context *ctx); +void r600_resume_queries(struct r600_common_context *ctx); +void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen); + +/* r600_streamout.c */ +void r600_streamout_buffers_dirty(struct r600_common_context *rctx); +void r600_set_streamout_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offset); +void r600_emit_streamout_end(struct r600_common_context *rctx); +void r600_update_prims_generated_query_state(struct r600_common_context *rctx, + unsigned type, int diff); +void r600_streamout_init(struct r600_common_context *rctx); + +/* r600_test_dma.c */ +void r600_test_dma(struct r600_common_screen *rscreen); + +/* r600_texture.c */ +bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, + struct r600_texture *rdst, + unsigned dst_level, unsigned dstx, + unsigned dsty, unsigned dstz, + struct r600_texture *rsrc, + unsigned src_level, + const struct pipe_box *src_box); +void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + unsigned nr_samples, + struct r600_fmask_info *out); +void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct r600_cmask_info *out); +bool r600_init_flushed_depth_texture(struct pipe_context *ctx, + struct pipe_resource *texture, + struct r600_texture **staging); +void r600_print_texture_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, struct u_log_context *log); +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ); +struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_surface *templ, + unsigned width0, unsigned height0, + unsigned width, unsigned height); +unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap); +void evergreen_do_fast_color_clear(struct r600_common_context *rctx, + struct pipe_framebuffer_state *fb, + struct r600_atom *fb_state, + unsigned *buffers, ubyte *dirty_cbufs, + const union pipe_color_union *color); +void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); +void r600_init_context_texture_functions(struct r600_common_context *rctx); + +/* r600_viewport.c */ +void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx, + struct pipe_scissor_state *scissor); +void r600_viewport_set_rast_deps(struct r600_common_context *rctx, + bool scissor_enable, bool clip_halfz); +void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx, + struct tgsi_shader_info *info); +void r600_init_viewport_functions(struct r600_common_context *rctx); + +/* cayman_msaa.c */ +extern const uint32_t eg_sample_locs_2x[4]; +extern const unsigned eg_max_dist_2x; +extern const uint32_t eg_sample_locs_4x[4]; +extern const unsigned eg_max_dist_4x; +void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, + unsigned sample_index, float *out_value); +void cayman_init_msaa(struct pipe_context *ctx); +void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); +void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, + int ps_iter_samples, int overrast_samples, + unsigned sc_mode_cntl_1); + + +/* Inline helpers. */ + +static inline struct r600_resource *r600_resource(struct pipe_resource *r) +{ + return (struct r600_resource*)r; +} + +static inline void +r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) +{ + pipe_resource_reference((struct pipe_resource **)ptr, + (struct pipe_resource *)res); +} + +static inline void +r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res) +{ + pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b); +} + +static inline void +r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_resource *res = (struct r600_resource *)r; + + if (res) { + /* Add memory usage for need_gfx_cs_space */ + rctx->vram += res->vram_usage; + rctx->gtt += res->gart_usage; + } +} + +static inline bool r600_get_strmout_en(struct r600_common_context *rctx) +{ + return rctx->streamout.streamout_enabled || + rctx->streamout.prims_gen_query_enabled; +} + +#define SQ_TEX_XY_FILTER_POINT 0x00 +#define SQ_TEX_XY_FILTER_BILINEAR 0x01 +#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02 +#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03 + +static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso) +{ + if (filter == PIPE_TEX_FILTER_LINEAR) + return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR + : SQ_TEX_XY_FILTER_BILINEAR; + else + return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT + : SQ_TEX_XY_FILTER_POINT; +} + +static inline unsigned r600_tex_aniso_filter(unsigned filter) +{ + if (filter < 2) + return 0; + if (filter < 4) + return 1; + if (filter < 8) + return 2; + if (filter < 16) + return 3; + return 4; +} + +static inline unsigned r600_wavefront_size(enum radeon_family family) +{ + switch (family) { + case CHIP_RV610: + case CHIP_RS780: + case CHIP_RV620: + case CHIP_RS880: + return 16; + case CHIP_RV630: + case CHIP_RV635: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_PALM: + case CHIP_CEDAR: + return 32; + default: + return 64; + } +} + +static inline enum radeon_bo_priority +r600_get_sampler_view_priority(struct r600_resource *res) +{ + if (res->b.b.target == PIPE_BUFFER) + return RADEON_PRIO_SAMPLER_BUFFER; + + if (res->b.b.nr_samples > 1) + return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; + + return RADEON_PRIO_SAMPLER_TEXTURE; +} + +static inline bool +r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) +{ + return (stencil_sampler && tex->can_sample_s) || + (!stencil_sampler && tex->can_sample_z); +} + +static inline bool +r600_htile_enabled(struct r600_texture *tex, unsigned level) +{ + return tex->htile_offset && level == 0; +} + +#define COMPUTE_DBG(rscreen, fmt, args...) \ + do { \ + if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ + } while (0); + +#define R600_ERR(fmt, args...) \ + fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args) + +/* For MSAA sample positions. */ +#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ + (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ + (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \ + (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ + (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) + +static inline int S_FIXED(float value, unsigned frac_bits) +{ + return value * (1 << frac_bits); +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/r600_query.c b/lib/mesa/src/gallium/drivers/r600/r600_query.c new file mode 100644 index 000000000..aa3e36f56 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_query.c @@ -0,0 +1,2126 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * Copyright 2014 Marek Olšák <marek.olsak@amd.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "r600_query.h" +#include "r600_pipe.h" +#include "r600_cs.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "os/os_time.h" +#include "tgsi/tgsi_text.h" + +#define R600_MAX_STREAMS 4 + +struct r600_hw_query_params { + unsigned start_offset; + unsigned end_offset; + unsigned fence_offset; + unsigned pair_stride; + unsigned pair_count; +}; + +/* Queries without buffer handling or suspend/resume. */ +struct r600_query_sw { + struct r600_query b; + + uint64_t begin_result; + uint64_t end_result; + + uint64_t begin_time; + uint64_t end_time; + + /* Fence for GPU_FINISHED. */ + struct pipe_fence_handle *fence; +}; + +static void r600_query_sw_destroy(struct r600_common_screen *rscreen, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL); + FREE(query); +} + +static enum radeon_value_id winsys_id_from_type(unsigned type) +{ + switch (type) { + case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; + case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; + case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM; + case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT; + case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; + case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS; + case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS; + case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS; + case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER; + case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; + case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS; + case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS; + case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; + case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE; + case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; + case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; + case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; + case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; + case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME; + default: unreachable("query type does not correspond to winsys id"); + } +} + +static bool r600_query_sw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + enum radeon_value_id ws_id; + + switch(query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + break; + case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; + case R600_QUERY_DECOMPRESS_CALLS: + query->begin_result = rctx->num_decompress_calls; + break; + case R600_QUERY_MRT_DRAW_CALLS: + query->begin_result = rctx->num_mrt_draw_calls; + break; + case R600_QUERY_PRIM_RESTART_CALLS: + query->begin_result = rctx->num_prim_restart_calls; + break; + case R600_QUERY_SPILL_DRAW_CALLS: + query->begin_result = rctx->num_spill_draw_calls; + break; + case R600_QUERY_COMPUTE_CALLS: + query->begin_result = rctx->num_compute_calls; + break; + case R600_QUERY_SPILL_COMPUTE_CALLS: + query->begin_result = rctx->num_spill_compute_calls; + break; + case R600_QUERY_DMA_CALLS: + query->begin_result = rctx->num_dma_calls; + break; + case R600_QUERY_CP_DMA_CALLS: + query->begin_result = rctx->num_cp_dma_calls; + break; + case R600_QUERY_NUM_VS_FLUSHES: + query->begin_result = rctx->num_vs_flushes; + break; + case R600_QUERY_NUM_PS_FLUSHES: + query->begin_result = rctx->num_ps_flushes; + break; + case R600_QUERY_NUM_CS_FLUSHES: + query->begin_result = rctx->num_cs_flushes; + break; + case R600_QUERY_NUM_CB_CACHE_FLUSHES: + query->begin_result = rctx->num_cb_cache_flushes; + break; + case R600_QUERY_NUM_DB_CACHE_FLUSHES: + query->begin_result = rctx->num_db_cache_flushes; + break; + case R600_QUERY_NUM_L2_INVALIDATES: + query->begin_result = rctx->num_L2_invalidates; + break; + case R600_QUERY_NUM_L2_WRITEBACKS: + query->begin_result = rctx->num_L2_writebacks; + break; + case R600_QUERY_NUM_RESIDENT_HANDLES: + query->begin_result = rctx->num_resident_handles; + break; + case R600_QUERY_TC_OFFLOADED_SLOTS: + query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; + break; + case R600_QUERY_TC_DIRECT_SLOTS: + query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0; + break; + case R600_QUERY_TC_NUM_SYNCS: + query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0; + break; + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_MAPPED_VRAM: + case R600_QUERY_MAPPED_GTT: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_VRAM_VIS_USAGE: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_NUM_MAPPED_BUFFERS: + query->begin_result = 0; + break; + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_NUM_GFX_IBS: + case R600_QUERY_NUM_SDMA_IBS: + case R600_QUERY_NUM_BYTES_MOVED: + case R600_QUERY_NUM_EVICTIONS: + case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } + case R600_QUERY_GFX_BO_LIST_SIZE: + ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + query->begin_time = rctx->ws->query_value(rctx->ws, + RADEON_NUM_GFX_IBS); + break; + case R600_QUERY_CS_THREAD_BUSY: + ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + query->begin_time = os_time_get_nano(); + break; + case R600_QUERY_GALLIUM_THREAD_BUSY: + query->begin_result = + rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; + query->begin_time = os_time_get_nano(); + break; + case R600_QUERY_GPU_LOAD: + case R600_QUERY_GPU_SHADERS_BUSY: + case R600_QUERY_GPU_TA_BUSY: + case R600_QUERY_GPU_GDS_BUSY: + case R600_QUERY_GPU_VGT_BUSY: + case R600_QUERY_GPU_IA_BUSY: + case R600_QUERY_GPU_SX_BUSY: + case R600_QUERY_GPU_WD_BUSY: + case R600_QUERY_GPU_BCI_BUSY: + case R600_QUERY_GPU_SC_BUSY: + case R600_QUERY_GPU_PA_BUSY: + case R600_QUERY_GPU_DB_BUSY: + case R600_QUERY_GPU_CP_BUSY: + case R600_QUERY_GPU_CB_BUSY: + case R600_QUERY_GPU_SDMA_BUSY: + case R600_QUERY_GPU_PFP_BUSY: + case R600_QUERY_GPU_MEQ_BUSY: + case R600_QUERY_GPU_ME_BUSY: + case R600_QUERY_GPU_SURF_SYNC_BUSY: + case R600_QUERY_GPU_CP_DMA_BUSY: + case R600_QUERY_GPU_SCRATCH_RAM_BUSY: + query->begin_result = r600_begin_counter(rctx->screen, + query->b.type); + break; + case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; + case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + case R600_QUERY_NUM_SHADER_CACHE_HITS: + query->begin_result = + p_atomic_read(&rctx->screen->num_shader_cache_hits); + break; + case R600_QUERY_GPIN_ASIC_ID: + case R600_QUERY_GPIN_NUM_SIMD: + case R600_QUERY_GPIN_NUM_RB: + case R600_QUERY_GPIN_NUM_SPI: + case R600_QUERY_GPIN_NUM_SE: + break; + default: + unreachable("r600_query_sw_begin: bad query type"); + } + + return true; +} + +static bool r600_query_sw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + enum radeon_value_id ws_id; + + switch(query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + case PIPE_QUERY_GPU_FINISHED: + rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED); + break; + case R600_QUERY_DRAW_CALLS: + query->end_result = rctx->num_draw_calls; + break; + case R600_QUERY_DECOMPRESS_CALLS: + query->end_result = rctx->num_decompress_calls; + break; + case R600_QUERY_MRT_DRAW_CALLS: + query->end_result = rctx->num_mrt_draw_calls; + break; + case R600_QUERY_PRIM_RESTART_CALLS: + query->end_result = rctx->num_prim_restart_calls; + break; + case R600_QUERY_SPILL_DRAW_CALLS: + query->end_result = rctx->num_spill_draw_calls; + break; + case R600_QUERY_COMPUTE_CALLS: + query->end_result = rctx->num_compute_calls; + break; + case R600_QUERY_SPILL_COMPUTE_CALLS: + query->end_result = rctx->num_spill_compute_calls; + break; + case R600_QUERY_DMA_CALLS: + query->end_result = rctx->num_dma_calls; + break; + case R600_QUERY_CP_DMA_CALLS: + query->end_result = rctx->num_cp_dma_calls; + break; + case R600_QUERY_NUM_VS_FLUSHES: + query->end_result = rctx->num_vs_flushes; + break; + case R600_QUERY_NUM_PS_FLUSHES: + query->end_result = rctx->num_ps_flushes; + break; + case R600_QUERY_NUM_CS_FLUSHES: + query->end_result = rctx->num_cs_flushes; + break; + case R600_QUERY_NUM_CB_CACHE_FLUSHES: + query->end_result = rctx->num_cb_cache_flushes; + break; + case R600_QUERY_NUM_DB_CACHE_FLUSHES: + query->end_result = rctx->num_db_cache_flushes; + break; + case R600_QUERY_NUM_L2_INVALIDATES: + query->end_result = rctx->num_L2_invalidates; + break; + case R600_QUERY_NUM_L2_WRITEBACKS: + query->end_result = rctx->num_L2_writebacks; + break; + case R600_QUERY_NUM_RESIDENT_HANDLES: + query->end_result = rctx->num_resident_handles; + break; + case R600_QUERY_TC_OFFLOADED_SLOTS: + query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; + break; + case R600_QUERY_TC_DIRECT_SLOTS: + query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0; + break; + case R600_QUERY_TC_NUM_SYNCS: + query->end_result = rctx->tc ? rctx->tc->num_syncs : 0; + break; + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_MAPPED_VRAM: + case R600_QUERY_MAPPED_GTT: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_VRAM_VIS_USAGE: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_NUM_MAPPED_BUFFERS: + case R600_QUERY_NUM_GFX_IBS: + case R600_QUERY_NUM_SDMA_IBS: + case R600_QUERY_NUM_BYTES_MOVED: + case R600_QUERY_NUM_EVICTIONS: + case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } + case R600_QUERY_GFX_BO_LIST_SIZE: + ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + query->end_time = rctx->ws->query_value(rctx->ws, + RADEON_NUM_GFX_IBS); + break; + case R600_QUERY_CS_THREAD_BUSY: + ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + query->end_time = os_time_get_nano(); + break; + case R600_QUERY_GALLIUM_THREAD_BUSY: + query->end_result = + rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; + query->end_time = os_time_get_nano(); + break; + case R600_QUERY_GPU_LOAD: + case R600_QUERY_GPU_SHADERS_BUSY: + case R600_QUERY_GPU_TA_BUSY: + case R600_QUERY_GPU_GDS_BUSY: + case R600_QUERY_GPU_VGT_BUSY: + case R600_QUERY_GPU_IA_BUSY: + case R600_QUERY_GPU_SX_BUSY: + case R600_QUERY_GPU_WD_BUSY: + case R600_QUERY_GPU_BCI_BUSY: + case R600_QUERY_GPU_SC_BUSY: + case R600_QUERY_GPU_PA_BUSY: + case R600_QUERY_GPU_DB_BUSY: + case R600_QUERY_GPU_CP_BUSY: + case R600_QUERY_GPU_CB_BUSY: + case R600_QUERY_GPU_SDMA_BUSY: + case R600_QUERY_GPU_PFP_BUSY: + case R600_QUERY_GPU_MEQ_BUSY: + case R600_QUERY_GPU_ME_BUSY: + case R600_QUERY_GPU_SURF_SYNC_BUSY: + case R600_QUERY_GPU_CP_DMA_BUSY: + case R600_QUERY_GPU_SCRATCH_RAM_BUSY: + query->end_result = r600_end_counter(rctx->screen, + query->b.type, + query->begin_result); + query->begin_result = 0; + break; + case R600_QUERY_NUM_COMPILATIONS: + query->end_result = p_atomic_read(&rctx->screen->num_compilations); + break; + case R600_QUERY_NUM_SHADERS_CREATED: + query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + case R600_QUERY_NUM_SHADER_CACHE_HITS: + query->end_result = + p_atomic_read(&rctx->screen->num_shader_cache_hits); + break; + case R600_QUERY_GPIN_ASIC_ID: + case R600_QUERY_GPIN_NUM_SIMD: + case R600_QUERY_GPIN_NUM_RB: + case R600_QUERY_GPIN_NUM_SPI: + case R600_QUERY_GPIN_NUM_SE: + break; + default: + unreachable("r600_query_sw_end: bad query type"); + } + + return true; +} + +static bool r600_query_sw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + bool wait, + union pipe_query_result *result) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch (query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* Convert from cycles per millisecond to cycles per second (Hz). */ + result->timestamp_disjoint.frequency = + (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; + result->timestamp_disjoint.disjoint = false; + return true; + case PIPE_QUERY_GPU_FINISHED: { + struct pipe_screen *screen = rctx->b.screen; + struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b; + + result->b = screen->fence_finish(screen, ctx, query->fence, + wait ? PIPE_TIMEOUT_INFINITE : 0); + return result->b; + } + + case R600_QUERY_GFX_BO_LIST_SIZE: + result->u64 = (query->end_result - query->begin_result) / + (query->end_time - query->begin_time); + return true; + case R600_QUERY_CS_THREAD_BUSY: + case R600_QUERY_GALLIUM_THREAD_BUSY: + result->u64 = (query->end_result - query->begin_result) * 100 / + (query->end_time - query->begin_time); + return true; + case R600_QUERY_GPIN_ASIC_ID: + result->u32 = 0; + return true; + case R600_QUERY_GPIN_NUM_SIMD: + result->u32 = rctx->screen->info.num_good_compute_units; + return true; + case R600_QUERY_GPIN_NUM_RB: + result->u32 = rctx->screen->info.num_render_backends; + return true; + case R600_QUERY_GPIN_NUM_SPI: + result->u32 = 1; /* all supported chips have one SPI per SE */ + return true; + case R600_QUERY_GPIN_NUM_SE: + result->u32 = rctx->screen->info.max_se; + return true; + } + + result->u64 = query->end_result - query->begin_result; + + switch (query->b.type) { + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_GPU_TEMPERATURE: + result->u64 /= 1000; + break; + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + result->u64 *= 1000000; + break; + } + + return true; +} + + +static struct r600_query_ops sw_query_ops = { + .destroy = r600_query_sw_destroy, + .begin = r600_query_sw_begin, + .end = r600_query_sw_end, + .get_result = r600_query_sw_get_result, + .get_result_resource = NULL +}; + +static struct pipe_query *r600_query_sw_create(unsigned query_type) +{ + struct r600_query_sw *query; + + query = CALLOC_STRUCT(r600_query_sw); + if (!query) + return NULL; + + query->b.type = query_type; + query->b.ops = &sw_query_ops; + + return (struct pipe_query *)query; +} + +void r600_query_hw_destroy(struct r600_common_screen *rscreen, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *prev = query->buffer.previous; + + /* Release all query buffers. */ + while (prev) { + struct r600_query_buffer *qbuf = prev; + prev = prev->previous; + r600_resource_reference(&qbuf->buf, NULL); + FREE(qbuf); + } + + r600_resource_reference(&query->buffer.buf, NULL); + r600_resource_reference(&query->workaround_buf, NULL); + FREE(rquery); +} + +static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen, + struct r600_query_hw *query) +{ + unsigned buf_size = MAX2(query->result_size, + rscreen->info.min_alloc_size); + + /* Queries are normally read by the CPU after + * being written by the gpu, hence staging is probably a good + * usage pattern. + */ + struct r600_resource *buf = (struct r600_resource*) + pipe_buffer_create(&rscreen->b, 0, + PIPE_USAGE_STAGING, buf_size); + if (!buf) + return NULL; + + if (!query->ops->prepare_buffer(rscreen, query, buf)) { + r600_resource_reference(&buf, NULL); + return NULL; + } + + return buf; +} + +static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen, + struct r600_query_hw *query, + struct r600_resource *buffer) +{ + /* Callers ensure that the buffer is currently unused by the GPU. */ + uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED); + if (!results) + return false; + + memset(results, 0, buffer->b.b.width0); + + if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || + query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) { + unsigned max_rbs = rscreen->info.num_render_backends; + unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask; + unsigned num_results; + unsigned i, j; + + /* Set top bits for unused backends. */ + num_results = buffer->b.b.width0 / query->result_size; + for (j = 0; j < num_results; j++) { + for (i = 0; i < max_rbs; i++) { + if (!(enabled_rb_mask & (1<<i))) { + results[(i * 4)+1] = 0x80000000; + results[(i * 4)+3] = 0x80000000; + } + } + results += 4 * max_rbs; + } + } + + return true; +} + +static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, + struct r600_query *rquery, + bool wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); + +static struct r600_query_ops query_hw_ops = { + .destroy = r600_query_hw_destroy, + .begin = r600_query_hw_begin, + .end = r600_query_hw_end, + .get_result = r600_query_hw_get_result, + .get_result_resource = r600_query_hw_get_result_resource, +}; + +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_add_result(struct r600_common_screen *rscreen, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +static void r600_query_hw_clear_result(struct r600_query_hw *, + union pipe_query_result *); + +static struct r600_query_hw_ops query_hw_default_hw_ops = { + .prepare_buffer = r600_query_hw_prepare_buffer, + .emit_start = r600_query_hw_do_emit_start, + .emit_stop = r600_query_hw_do_emit_stop, + .clear_result = r600_query_hw_clear_result, + .add_result = r600_query_hw_add_result, +}; + +bool r600_query_hw_init(struct r600_common_screen *rscreen, + struct r600_query_hw *query) +{ + query->buffer.buf = r600_new_query_buffer(rscreen, query); + if (!query->buffer.buf) + return false; + + return true; +} + +static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen, + unsigned query_type, + unsigned index) +{ + struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); + if (!query) + return NULL; + + query->b.type = query_type; + query->b.ops = &query_hw_ops; + query->ops = &query_hw_default_hw_ops; + + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + query->result_size = 16 * rscreen->info.num_render_backends; + query->result_size += 16; /* for the fence + alignment */ + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen); + break; + case PIPE_QUERY_TIME_ELAPSED: + query->result_size = 24; + query->num_cs_dw_begin = 8; + query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen); + break; + case PIPE_QUERY_TIMESTAMP: + query->result_size = 16; + query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen); + query->flags = R600_QUERY_HW_FLAG_NO_START; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->stream = index; + break; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32 * R600_MAX_STREAMS; + query->num_cs_dw_begin = 6 * R600_MAX_STREAMS; + query->num_cs_dw_end = 6 * R600_MAX_STREAMS; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + /* 11 values on EG, 8 on R600. */ + query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16; + query->result_size += 8; /* for the fence + alignment */ + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen); + break; + default: + assert(0); + FREE(query); + return NULL; + } + + if (!r600_query_hw_init(rscreen, query)) { + FREE(query); + return NULL; + } + + return (struct pipe_query *)query; +} + +static void r600_update_occlusion_query_state(struct r600_common_context *rctx, + unsigned type, int diff) +{ + if (type == PIPE_QUERY_OCCLUSION_COUNTER || + type == PIPE_QUERY_OCCLUSION_PREDICATE) { + bool old_enable = rctx->num_occlusion_queries != 0; + bool old_perfect_enable = + rctx->num_perfect_occlusion_queries != 0; + bool enable, perfect_enable; + + rctx->num_occlusion_queries += diff; + assert(rctx->num_occlusion_queries >= 0); + + if (type == PIPE_QUERY_OCCLUSION_COUNTER) { + rctx->num_perfect_occlusion_queries += diff; + assert(rctx->num_perfect_occlusion_queries >= 0); + } + + enable = rctx->num_occlusion_queries != 0; + perfect_enable = rctx->num_perfect_occlusion_queries != 0; + + if (enable != old_enable || perfect_enable != old_perfect_enable) { + struct r600_context *ctx = (struct r600_context*)rctx; + r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); + } + } +} + +static unsigned event_type_for_stream(unsigned stream) +{ + switch (stream) { + default: + case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS; + case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1; + case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2; + case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3; + } +} + +static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va, + unsigned stream) +{ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); +} + +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + emit_sample_streamout(cs, va, query->stream); + break; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) + emit_sample_streamout(cs, va + 32 * stream, stream); + break; + case PIPE_QUERY_TIME_ELAPSED: + /* Write the timestamp after the last draw is done. + * (bottom-of-pipe) + */ + r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, + 0, EOP_DATA_SEL_TIMESTAMP, + NULL, va, 0, query->b.type); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + break; + default: + assert(0); + } + r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, + RADEON_PRIO_QUERY); +} + +static void r600_query_hw_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + uint64_t va; + + if (!query->buffer.buf) + return; // previous buffer allocation failure + + r600_update_occlusion_query_state(ctx, query->b.type, 1); + r600_update_prims_generated_query_state(ctx, query->b.type, 1); + + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, + true); + + /* Get a new query buffer if needed. */ + if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { + struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); + *qbuf = query->buffer; + query->buffer.results_end = 0; + query->buffer.previous = qbuf; + query->buffer.buf = r600_new_query_buffer(ctx->screen, query); + if (!query->buffer.buf) + return; + } + + /* emit begin query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_start(ctx, query, query->buffer.buf, va); + + ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; +} + +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + uint64_t fence_va = 0; + + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + va += 8; + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + + fence_va = va + ctx->screen->info.num_render_backends * 16 - 8; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + va += 16; + emit_sample_streamout(cs, va, query->stream); + break; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + va += 16; + for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) + emit_sample_streamout(cs, va + 32 * stream, stream); + break; + case PIPE_QUERY_TIME_ELAPSED: + va += 8; + /* fall through */ + case PIPE_QUERY_TIMESTAMP: + r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, + 0, EOP_DATA_SEL_TIMESTAMP, NULL, va, + 0, query->b.type); + fence_va = va + 8; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + unsigned sample_size = (query->result_size - 8) / 2; + + va += sample_size; + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + + fence_va = va + sample_size; + break; + } + default: + assert(0); + } + r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, + RADEON_PRIO_QUERY); + + if (fence_va) + r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, + EOP_DATA_SEL_VALUE_32BIT, + query->buffer.buf, fence_va, 0x80000000, + query->b.type); +} + +static void r600_query_hw_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + uint64_t va; + + if (!query->buffer.buf) + return; // previous buffer allocation failure + + /* The queries which need begin already called this in begin_query. */ + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false); + } + + /* emit end query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_stop(ctx, query, query->buffer.buf, va); + + query->buffer.results_end += query->result_size; + + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) + ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; + + r600_update_occlusion_query_state(ctx, query->b.type, -1); + r600_update_prims_generated_query_state(ctx, query->b.type, -1); +} + +static void emit_set_predicate(struct r600_common_context *ctx, + struct r600_resource *buf, uint64_t va, + uint32_t op) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); + radeon_emit(cs, va); + radeon_emit(cs, op | ((va >> 32) & 0xFF)); + r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, + RADEON_PRIO_QUERY); +} + +static void r600_emit_query_predication(struct r600_common_context *ctx, + struct r600_atom *atom) +{ + struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; + struct r600_query_buffer *qbuf; + uint32_t op; + bool flag_wait, invert; + + if (!query) + return; + + invert = ctx->render_cond_invert; + flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || + ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; + + if (query->workaround_buf) { + op = PRED_OP(PREDICATION_OP_BOOL64); + } else { + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + op = PRED_OP(PREDICATION_OP_ZPASS); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + op = PRED_OP(PREDICATION_OP_PRIMCOUNT); + invert = !invert; + break; + default: + assert(0); + return; + } + } + + /* if true then invert, see GL_ARB_conditional_render_inverted */ + if (invert) + op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */ + else + op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */ + + /* Use the value written by compute shader as a workaround. Note that + * the wait flag does not apply in this predication mode. + * + * The shader outputs the result value to L2. Workarounds only affect VI + * and later, where the CP reads data from L2, so we don't need an + * additional flush. + */ + if (query->workaround_buf) { + uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset; + emit_set_predicate(ctx, query->workaround_buf, va, op); + return; + } + + op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; + + /* emit predicate packets for all data blocks */ + for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned results_base = 0; + uint64_t va_base = qbuf->buf->gpu_address; + + while (results_base < qbuf->results_end) { + uint64_t va = va_base + results_base; + + if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { + for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { + emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op); + + /* set CONTINUE bit for all packets except the first */ + op |= PREDICATION_CONTINUE; + } + } else { + emit_set_predicate(ctx, qbuf->buf, va, op); + op |= PREDICATION_CONTINUE; + } + + results_base += query->result_size; + } + } +} + +static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct r600_common_screen *rscreen = + (struct r600_common_screen *)ctx->screen; + + if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || + query_type == PIPE_QUERY_GPU_FINISHED || + query_type >= PIPE_QUERY_DRIVER_SPECIFIC) + return r600_query_sw_create(query_type); + + return r600_query_hw_create(rscreen, query_type, index); +} + +static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + rquery->ops->destroy(rctx->screen, rquery); +} + +static boolean r600_begin_query(struct pipe_context *ctx, + struct pipe_query *query) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->begin(rctx, rquery); +} + +void r600_query_hw_reset_buffers(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + struct r600_query_buffer *prev = query->buffer.previous; + + /* Discard the old query buffers. */ + while (prev) { + struct r600_query_buffer *qbuf = prev; + prev = prev->previous; + r600_resource_reference(&qbuf->buf, NULL); + FREE(qbuf); + } + + query->buffer.results_end = 0; + query->buffer.previous = NULL; + + /* Obtain a new buffer if the current one can't be mapped without a stall. */ + if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { + r600_resource_reference(&query->buffer.buf, NULL); + query->buffer.buf = r600_new_query_buffer(rctx->screen, query); + } else { + if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf)) + r600_resource_reference(&query->buffer.buf, NULL); + } +} + +bool r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + assert(0); + return false; + } + + if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) + r600_query_hw_reset_buffers(rctx, query); + + r600_resource_reference(&query->workaround_buf, NULL); + + r600_query_hw_emit_start(rctx, query); + if (!query->buffer.buf) + return false; + + LIST_ADDTAIL(&query->list, &rctx->active_queries); + return true; +} + +static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->end(rctx, rquery); +} + +bool r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + + if (query->flags & R600_QUERY_HW_FLAG_NO_START) + r600_query_hw_reset_buffers(rctx, query); + + r600_query_hw_emit_stop(rctx, query); + + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) + LIST_DELINIT(&query->list); + + if (!query->buffer.buf) + return false; + + return true; +} + +static void r600_get_hw_query_params(struct r600_common_context *rctx, + struct r600_query_hw *rquery, int index, + struct r600_hw_query_params *params) +{ + unsigned max_rbs = rctx->screen->info.num_render_backends; + + params->pair_stride = 0; + params->pair_count = 1; + + switch (rquery->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + params->start_offset = 0; + params->end_offset = 8; + params->fence_offset = max_rbs * 16; + params->pair_stride = 16; + params->pair_count = max_rbs; + break; + case PIPE_QUERY_TIME_ELAPSED: + params->start_offset = 0; + params->end_offset = 8; + params->fence_offset = 16; + break; + case PIPE_QUERY_TIMESTAMP: + params->start_offset = 0; + params->end_offset = 0; + params->fence_offset = 8; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + params->start_offset = 8; + params->end_offset = 24; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + params->start_offset = 0; + params->end_offset = 16; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_SO_STATISTICS: + params->start_offset = 8 - index * 8; + params->end_offset = 24 - index * 8; + params->fence_offset = params->end_offset + 4; + break; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + params->pair_count = R600_MAX_STREAMS; + params->pair_stride = 32; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + params->start_offset = 0; + params->end_offset = 16; + + /* We can re-use the high dword of the last 64-bit value as a + * fence: it is initialized as 0, and the high bit is set by + * the write of the streamout stats event. + */ + params->fence_offset = rquery->result_size - 4; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + { + /* Offsets apply to EG+ */ + static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80}; + params->start_offset = offsets[index]; + params->end_offset = 88 + offsets[index]; + params->fence_offset = 2 * 88; + break; + } + default: + unreachable("r600_get_hw_query_params unsupported"); + } +} + +static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, + bool test_status_bit) +{ + uint32_t *current_result = (uint32_t*)map; + uint64_t start, end; + + start = (uint64_t)current_result[start_index] | + (uint64_t)current_result[start_index+1] << 32; + end = (uint64_t)current_result[end_index] | + (uint64_t)current_result[end_index+1] << 32; + + if (!test_status_bit || + ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { + return end - start; + } + return 0; +} + +static void r600_query_hw_add_result(struct r600_common_screen *rscreen, + struct r600_query_hw *query, + void *buffer, + union pipe_query_result *result) +{ + unsigned max_rbs = rscreen->info.num_render_backends; + + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: { + for (unsigned i = 0; i < max_rbs; ++i) { + unsigned results_base = i * 16; + result->u64 += + r600_query_read_result(buffer + results_base, 0, 2, true); + } + break; + } + case PIPE_QUERY_OCCLUSION_PREDICATE: { + for (unsigned i = 0; i < max_rbs; ++i) { + unsigned results_base = i * 16; + result->b = result->b || + r600_query_read_result(buffer + results_base, 0, 2, true) != 0; + } + break; + } + case PIPE_QUERY_TIME_ELAPSED: + result->u64 += r600_query_read_result(buffer, 0, 2, false); + break; + case PIPE_QUERY_TIMESTAMP: + result->u64 = *(uint64_t*)buffer; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + /* SAMPLE_STREAMOUTSTATS stores this structure: + * { + * u64 NumPrimitivesWritten; + * u64 PrimitiveStorageNeeded; + * } + * We only need NumPrimitivesWritten here. */ + result->u64 += r600_query_read_result(buffer, 2, 6, true); + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + /* Here we read PrimitiveStorageNeeded. */ + result->u64 += r600_query_read_result(buffer, 0, 4, true); + break; + case PIPE_QUERY_SO_STATISTICS: + result->so_statistics.num_primitives_written += + r600_query_read_result(buffer, 2, 6, true); + result->so_statistics.primitives_storage_needed += + r600_query_read_result(buffer, 0, 4, true); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + result->b = result->b || + r600_query_read_result(buffer, 2, 6, true) != + r600_query_read_result(buffer, 0, 4, true); + break; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { + result->b = result->b || + r600_query_read_result(buffer, 2, 6, true) != + r600_query_read_result(buffer, 0, 4, true); + buffer = (char *)buffer + 32; + } + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + if (rscreen->chip_class >= EVERGREEN) { + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 22, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 24, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 26, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 28, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 30, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 32, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 34, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 36, false); + result->pipeline_statistics.hs_invocations += + r600_query_read_result(buffer, 16, 38, false); + result->pipeline_statistics.ds_invocations += + r600_query_read_result(buffer, 18, 40, false); + result->pipeline_statistics.cs_invocations += + r600_query_read_result(buffer, 20, 42, false); + } else { + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 16, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 18, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 20, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 22, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 24, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 26, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 28, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 30, false); + } +#if 0 /* for testing */ + printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " + "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " + "Clipper prims=%llu, PS=%llu, CS=%llu\n", + result->pipeline_statistics.ia_vertices, + result->pipeline_statistics.ia_primitives, + result->pipeline_statistics.vs_invocations, + result->pipeline_statistics.hs_invocations, + result->pipeline_statistics.ds_invocations, + result->pipeline_statistics.gs_invocations, + result->pipeline_statistics.gs_primitives, + result->pipeline_statistics.c_invocations, + result->pipeline_statistics.c_primitives, + result->pipeline_statistics.ps_invocations, + result->pipeline_statistics.cs_invocations); +#endif + break; + default: + assert(0); + } +} + +static boolean r600_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, boolean wait, + union pipe_query_result *result) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->get_result(rctx, rquery, wait, result); +} + +static void r600_get_query_result_resource(struct pipe_context *ctx, + struct pipe_query *query, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index, + resource, offset); +} + +static void r600_query_hw_clear_result(struct r600_query_hw *query, + union pipe_query_result *result) +{ + util_query_clear_result(result, query->b.type); +} + +bool r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + bool wait, union pipe_query_result *result) +{ + struct r600_common_screen *rscreen = rctx->screen; + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *qbuf; + + query->ops->clear_result(query, result); + + for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned usage = PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK); + unsigned results_base = 0; + void *map; + + if (rquery->b.flushed) + map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); + else + map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage); + + if (!map) + return false; + + while (results_base != qbuf->results_end) { + query->ops->add_result(rscreen, query, map + results_base, + result); + results_base += query->result_size; + } + } + + /* Convert the time to expected units. */ + if (rquery->type == PIPE_QUERY_TIME_ELAPSED || + rquery->type == PIPE_QUERY_TIMESTAMP) { + result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq; + } + return true; +} + +/* Create the compute shader that is used to collect the results. + * + * One compute grid with a single thread is launched for every query result + * buffer. The thread (optionally) reads a previous summary buffer, then + * accumulates data from the query result buffer, and writes the result either + * to a summary buffer to be consumed by the next grid invocation or to the + * user-supplied buffer. + * + * Data layout: + * + * CONST + * 0.x = end_offset + * 0.y = result_stride + * 0.z = result_count + * 0.w = bit field: + * 1: read previously accumulated values + * 2: write accumulated values for chaining + * 4: write result available + * 8: convert result to boolean (0/1) + * 16: only read one dword and use that as result + * 32: apply timestamp conversion + * 64: store full 64 bits result + * 128: store signed 32 bits result + * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs + * 1.x = fence_offset + * 1.y = pair_stride + * 1.z = pair_count + * + * BUFFER[0] = query result buffer + * BUFFER[1] = previous summary buffer + * BUFFER[2] = next summary buffer or user-supplied buffer + */ +static void r600_create_query_result_shader(struct r600_common_context *rctx) +{ + /* TEMP[0].xy = accumulated result so far + * TEMP[0].z = result not available + * + * TEMP[1].x = current result index + * TEMP[1].y = current pair index + */ + static const char text_tmpl[] = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL BUFFER[0]\n" + "DCL BUFFER[1]\n" + "DCL BUFFER[2]\n" + "DCL CONST[0][0..1]\n" + "DCL TEMP[0..5]\n" + "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n" + "IMM[1] UINT32 {1, 2, 4, 8}\n" + "IMM[2] UINT32 {16, 32, 64, 128}\n" + "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */ + "IMM[4] UINT32 {256, 0, 0, 0}\n" + + "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n" + "UIF TEMP[5]\n" + /* Check result availability. */ + "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n" + "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n" + "MOV TEMP[1], TEMP[0].zzzz\n" + "NOT TEMP[0].z, TEMP[0].zzzz\n" + + /* Load result if available. */ + "UIF TEMP[1]\n" + "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n" + "ENDIF\n" + "ELSE\n" + /* Load previously accumulated result if requested. */ + "MOV TEMP[0], IMM[0].xxxx\n" + "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n" + "UIF TEMP[4]\n" + "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n" + "ENDIF\n" + + "MOV TEMP[1].x, IMM[0].xxxx\n" + "BGNLOOP\n" + /* Break if accumulated result so far is not available. */ + "UIF TEMP[0].zzzz\n" + "BRK\n" + "ENDIF\n" + + /* Break if result_index >= result_count. */ + "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n" + "UIF TEMP[5]\n" + "BRK\n" + "ENDIF\n" + + /* Load fence and check result availability */ + "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n" + "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" + "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n" + "NOT TEMP[0].z, TEMP[0].zzzz\n" + "UIF TEMP[0].zzzz\n" + "BRK\n" + "ENDIF\n" + + "MOV TEMP[1].y, IMM[0].xxxx\n" + "BGNLOOP\n" + /* Load start and end. */ + "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n" + "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n" + "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" + + "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n" + "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" + + "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n" + + "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n" + "UIF TEMP[5].zzzz\n" + /* Load second start/end half-pair and + * take the difference + */ + "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n" + "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" + "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" + + "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n" + "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n" + "ENDIF\n" + + "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n" + + /* Increment pair index */ + "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n" + "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n" + "UIF TEMP[5]\n" + "BRK\n" + "ENDIF\n" + "ENDLOOP\n" + + /* Increment result index */ + "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n" + "ENDLOOP\n" + "ENDIF\n" + + "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n" + "UIF TEMP[4]\n" + /* Store accumulated data for chaining. */ + "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n" + "ELSE\n" + "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n" + "UIF TEMP[4]\n" + /* Store result availability. */ + "NOT TEMP[0].z, TEMP[0]\n" + "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n" + "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n" + + "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" + "UIF TEMP[4]\n" + "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n" + "ENDIF\n" + "ELSE\n" + /* Store result if it is available. */ + "NOT TEMP[4], TEMP[0].zzzz\n" + "UIF TEMP[4]\n" + /* Apply timestamp conversion */ + "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n" + "UIF TEMP[4]\n" + "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n" + "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n" + "ENDIF\n" + + /* Convert to boolean */ + "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n" + "UIF TEMP[4]\n" + "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n" + "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n" + "MOV TEMP[0].y, IMM[0].xxxx\n" + "ENDIF\n" + + "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" + "UIF TEMP[4]\n" + "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n" + "ELSE\n" + /* Clamping */ + "UIF TEMP[0].yyyy\n" + "MOV TEMP[0].x, IMM[0].wwww\n" + "ENDIF\n" + + "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n" + "UIF TEMP[4]\n" + "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n" + "ENDIF\n" + + "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n" + "ENDIF\n" + "ENDIF\n" + "ENDIF\n" + "ENDIF\n" + + "END\n"; + + char text[sizeof(text_tmpl) + 32]; + struct tgsi_token tokens[1024]; + struct pipe_compute_state state = {}; + + /* Hard code the frequency into the shader so that the backend can + * use the full range of optimizations for divide-by-constant. + */ + snprintf(text, sizeof(text), text_tmpl, + rctx->screen->info.clock_crystal_freq); + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(false); + return; + } + + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state); +} + +static void r600_restore_qbo_state(struct r600_common_context *rctx, + struct r600_qbo_state *st) +{ + rctx->b.bind_compute_state(&rctx->b, st->saved_compute); + + rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); + pipe_resource_reference(&st->saved_const0.buffer, NULL); + + rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); + for (unsigned i = 0; i < 3; ++i) + pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL); +} + +static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, + struct r600_query *rquery, + bool wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *qbuf; + struct r600_query_buffer *qbuf_prev; + struct pipe_resource *tmp_buffer = NULL; + unsigned tmp_buffer_offset = 0; + struct r600_qbo_state saved_state = {}; + struct pipe_grid_info grid = {}; + struct pipe_constant_buffer constant_buffer = {}; + struct pipe_shader_buffer ssbo[3]; + struct r600_hw_query_params params; + struct { + uint32_t end_offset; + uint32_t result_stride; + uint32_t result_count; + uint32_t config; + uint32_t fence_offset; + uint32_t pair_stride; + uint32_t pair_count; + } consts; + + if (!rctx->query_result_shader) { + r600_create_query_result_shader(rctx); + if (!rctx->query_result_shader) + return; + } + + if (query->buffer.previous) { + u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16, + &tmp_buffer_offset, &tmp_buffer); + if (!tmp_buffer) + return; + } + + rctx->save_qbo_state(&rctx->b, &saved_state); + + r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, ¶ms); + consts.end_offset = params.end_offset - params.start_offset; + consts.fence_offset = params.fence_offset - params.start_offset; + consts.result_stride = query->result_size; + consts.pair_stride = params.pair_stride; + consts.pair_count = params.pair_count; + + constant_buffer.buffer_size = sizeof(consts); + constant_buffer.user_buffer = &consts; + + ssbo[1].buffer = tmp_buffer; + ssbo[1].buffer_offset = tmp_buffer_offset; + ssbo[1].buffer_size = 16; + + ssbo[2] = ssbo[1]; + + rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader); + + grid.block[0] = 1; + grid.block[1] = 1; + grid.block[2] = 1; + grid.grid[0] = 1; + grid.grid[1] = 1; + grid.grid[2] = 1; + + consts.config = 0; + if (index < 0) + consts.config |= 4; + if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) + consts.config |= 8; + else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || + query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + consts.config |= 8 | 256; + else if (query->b.type == PIPE_QUERY_TIMESTAMP || + query->b.type == PIPE_QUERY_TIME_ELAPSED) + consts.config |= 32; + + switch (result_type) { + case PIPE_QUERY_TYPE_U64: + case PIPE_QUERY_TYPE_I64: + consts.config |= 64; + break; + case PIPE_QUERY_TYPE_I32: + consts.config |= 128; + break; + case PIPE_QUERY_TYPE_U32: + break; + } + + rctx->flags |= rctx->screen->barrier_flags.cp_to_L2; + + for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) { + if (query->b.type != PIPE_QUERY_TIMESTAMP) { + qbuf_prev = qbuf->previous; + consts.result_count = qbuf->results_end / query->result_size; + consts.config &= ~3; + if (qbuf != &query->buffer) + consts.config |= 1; + if (qbuf->previous) + consts.config |= 2; + } else { + /* Only read the last timestamp. */ + qbuf_prev = NULL; + consts.result_count = 0; + consts.config |= 16; + params.start_offset += qbuf->results_end - query->result_size; + } + + rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer); + + ssbo[0].buffer = &qbuf->buf->b.b; + ssbo[0].buffer_offset = params.start_offset; + ssbo[0].buffer_size = qbuf->results_end - params.start_offset; + + if (!qbuf->previous) { + ssbo[2].buffer = resource; + ssbo[2].buffer_offset = offset; + ssbo[2].buffer_size = 8; + + } + + rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo); + + if (wait && qbuf == &query->buffer) { + uint64_t va; + + /* Wait for result availability. Wait only for readiness + * of the last entry, since the fence writes should be + * serialized in the CP. + */ + va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; + va += params.fence_offset; + + r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000); + } + + rctx->b.launch_grid(&rctx->b, &grid); + rctx->flags |= rctx->screen->barrier_flags.compute_to_L2; + } + + r600_restore_qbo_state(rctx, &saved_state); + pipe_resource_reference(&tmp_buffer, NULL); +} + +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + boolean condition, + enum pipe_render_cond_flag mode) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query_hw *rquery = (struct r600_query_hw *)query; + struct r600_query_buffer *qbuf; + struct r600_atom *atom = &rctx->render_cond_atom; + + /* Compute the size of SET_PREDICATION packets. */ + atom->num_dw = 0; + if (query) { + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + + if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + atom->num_dw *= R600_MAX_STREAMS; + } + + rctx->render_cond = query; + rctx->render_cond_invert = condition; + rctx->render_cond_mode = mode; + + rctx->set_atom_dirty(rctx, atom, query != NULL); +} + +void r600_suspend_queries(struct r600_common_context *ctx) +{ + struct r600_query_hw *query; + + LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { + r600_query_hw_emit_stop(ctx, query); + } + assert(ctx->num_cs_dw_queries_suspend == 0); +} + +static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, + struct list_head *query_list) +{ + struct r600_query_hw *query; + unsigned num_dw = 0; + + LIST_FOR_EACH_ENTRY(query, query_list, list) { + /* begin + end */ + num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; + + /* Workaround for the fact that + * num_cs_dw_nontimer_queries_suspend is incremented for every + * resumed query, which raises the bar in need_cs_space for + * queries about to be resumed. + */ + num_dw += query->num_cs_dw_end; + } + /* primitives generated query */ + num_dw += ctx->streamout.enable_atom.num_dw; + /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */ + num_dw += 13; + + return num_dw; +} + +void r600_resume_queries(struct r600_common_context *ctx) +{ + struct r600_query_hw *query; + unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries); + + assert(ctx->num_cs_dw_queries_suspend == 0); + + /* Check CS space here. Resuming must not be interrupted by flushes. */ + ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true); + + LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { + r600_query_hw_emit_start(ctx, query); + } +} + +/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */ +void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) +{ + struct r600_common_context *ctx = + (struct r600_common_context*)rscreen->aux_context; + struct radeon_winsys_cs *cs = ctx->gfx.cs; + struct r600_resource *buffer; + uint32_t *results; + unsigned i, mask = 0; + unsigned max_rbs = ctx->screen->info.num_render_backends; + + assert(rscreen->chip_class <= CAYMAN); + + /* if backend_map query is supported by the kernel */ + if (rscreen->info.r600_gb_backend_map_valid) { + unsigned num_tile_pipes = rscreen->info.num_tile_pipes; + unsigned backend_map = rscreen->info.r600_gb_backend_map; + unsigned item_width, item_mask; + + if (ctx->chip_class >= EVERGREEN) { + item_width = 4; + item_mask = 0x7; + } else { + item_width = 2; + item_mask = 0x3; + } + + while (num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<<i); + backend_map >>= item_width; + } + if (mask != 0) { + rscreen->info.enabled_rb_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = (struct r600_resource*) + pipe_buffer_create(ctx->b.screen, 0, + PIPE_USAGE_STAGING, max_rbs * 16); + if (!buffer) + return; + + /* initialize buffer with zeroes */ + results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE); + if (results) { + memset(results, 0, max_rbs * 4 * 4); + + /* emit EVENT_WRITE for ZPASS_DONE */ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, buffer->gpu_address); + radeon_emit(cs, buffer->gpu_address >> 32); + + r600_emit_reloc(ctx, &ctx->gfx, buffer, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + + /* analyze results */ + results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); + if (results) { + for(i = 0; i < max_rbs; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<<i); + } + } + } + + r600_resource_reference(&buffer, NULL); + + if (mask) + rscreen->info.enabled_rb_mask = mask; +} + +#define XFULL(name_, query_type_, type_, result_type_, group_id_) \ + { \ + .name = name_, \ + .query_type = R600_QUERY_##query_type_, \ + .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ + .group_id = group_id_ \ + } + +#define X(name_, query_type_, type_, result_type_) \ + XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0) + +#define XG(group_, name_, query_type_, type_, result_type_) \ + XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_) + +static struct pipe_driver_query_info r600_driver_query_list[] = { + X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), + X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), + X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE), + X("draw-calls", DRAW_CALLS, UINT64, AVERAGE), + X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE), + X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE), + X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE), + X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE), + X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), + X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), + X("dma-calls", DMA_CALLS, UINT64, AVERAGE), + X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), + X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), + X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), + X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), + X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE), + X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE), + X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE), + X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE), + X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE), + X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE), + X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE), + X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE), + X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE), + X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE), + X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), + X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), + X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), + X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), + X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), + X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE), + X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), + X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE), + X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE), + X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), + X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), + X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE), + X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), + X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE), + X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), + + /* GPIN queries are for the benefit of old versions of GPUPerfStudio, + * which use it as a fallback path to detect the GPU type. + * + * Note: The names of these queries are significant for GPUPerfStudio + * (and possibly their order as well). */ + XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE), + XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE), + XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE), + XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE), + XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE), + + X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), + X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), + X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), + + /* The following queries must be at the end of the list because their + * availability is adjusted dynamically based on the DRM version. */ + X("GPU-load", GPU_LOAD, UINT64, AVERAGE), + X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE), + X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE), + X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE), + X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE), + X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE), + X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE), + X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE), + X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE), + X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE), + X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE), + X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE), + X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE), + X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE), + X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE), + X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE), + X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE), + X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE), + X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE), + X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE), + X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE), +}; + +#undef X +#undef XG +#undef XFULL + +static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) +{ + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + return ARRAY_SIZE(r600_driver_query_list); + else + return ARRAY_SIZE(r600_driver_query_list) - 25; +} + +static int r600_get_driver_query_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + unsigned num_queries = r600_get_num_queries(rscreen); + + if (!info) { + unsigned num_perfcounters = + r600_get_perfcounter_info(rscreen, 0, NULL); + + return num_queries + num_perfcounters; + } + + if (index >= num_queries) + return r600_get_perfcounter_info(rscreen, index - num_queries, info); + + *info = r600_driver_query_list[index]; + + switch (info->query_type) { + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_MAPPED_VRAM: + info->max_value.u64 = rscreen->info.vram_size; + break; + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_MAPPED_GTT: + info->max_value.u64 = rscreen->info.gart_size; + break; + case R600_QUERY_GPU_TEMPERATURE: + info->max_value.u64 = 125; + break; + case R600_QUERY_VRAM_VIS_USAGE: + info->max_value.u64 = rscreen->info.vram_vis_size; + break; + } + + if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) + info->group_id += rscreen->perfcounters->num_groups; + + return 1; +} + +/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware + * performance counter groups, so be careful when changing this and related + * functions. + */ +static int r600_get_driver_query_group_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + unsigned num_pc_groups = 0; + + if (rscreen->perfcounters) + num_pc_groups = rscreen->perfcounters->num_groups; + + if (!info) + return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; + + if (index < num_pc_groups) + return r600_get_perfcounter_group_info(rscreen, index, info); + + index -= num_pc_groups; + if (index >= R600_NUM_SW_QUERY_GROUPS) + return 0; + + info->name = "GPIN"; + info->max_active_queries = 5; + info->num_queries = 5; + return 1; +} + +void r600_query_init(struct r600_common_context *rctx) +{ + rctx->b.create_query = r600_create_query; + rctx->b.create_batch_query = r600_create_batch_query; + rctx->b.destroy_query = r600_destroy_query; + rctx->b.begin_query = r600_begin_query; + rctx->b.end_query = r600_end_query; + rctx->b.get_query_result = r600_get_query_result; + rctx->b.get_query_result_resource = r600_get_query_result_resource; + rctx->render_cond_atom.emit = r600_emit_query_predication; + + if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0) + rctx->b.render_condition = r600_render_condition; + + LIST_INITHEAD(&rctx->active_queries); +} + +void r600_init_screen_query_functions(struct r600_common_screen *rscreen) +{ + rscreen->b.get_driver_query_info = r600_get_driver_query_info; + rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_query.h b/lib/mesa/src/gallium/drivers/r600/r600_query.h new file mode 100644 index 000000000..1a3c6839e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_query.h @@ -0,0 +1,326 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Nicolai Hähnle <nicolai.haehnle@amd.com> + * + */ + +#ifndef R600_QUERY_H +#define R600_QUERY_H + +#include "util/u_threaded_context.h" + +struct pipe_context; +struct pipe_query; +struct pipe_resource; + +struct r600_common_context; +struct r600_common_screen; +struct r600_query; +struct r600_query_hw; +struct r600_resource; + +enum { + R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, + R600_QUERY_DECOMPRESS_CALLS, + R600_QUERY_MRT_DRAW_CALLS, + R600_QUERY_PRIM_RESTART_CALLS, + R600_QUERY_SPILL_DRAW_CALLS, + R600_QUERY_COMPUTE_CALLS, + R600_QUERY_SPILL_COMPUTE_CALLS, + R600_QUERY_DMA_CALLS, + R600_QUERY_CP_DMA_CALLS, + R600_QUERY_NUM_VS_FLUSHES, + R600_QUERY_NUM_PS_FLUSHES, + R600_QUERY_NUM_CS_FLUSHES, + R600_QUERY_NUM_CB_CACHE_FLUSHES, + R600_QUERY_NUM_DB_CACHE_FLUSHES, + R600_QUERY_NUM_L2_INVALIDATES, + R600_QUERY_NUM_L2_WRITEBACKS, + R600_QUERY_NUM_RESIDENT_HANDLES, + R600_QUERY_TC_OFFLOADED_SLOTS, + R600_QUERY_TC_DIRECT_SLOTS, + R600_QUERY_TC_NUM_SYNCS, + R600_QUERY_CS_THREAD_BUSY, + R600_QUERY_GALLIUM_THREAD_BUSY, + R600_QUERY_REQUESTED_VRAM, + R600_QUERY_REQUESTED_GTT, + R600_QUERY_MAPPED_VRAM, + R600_QUERY_MAPPED_GTT, + R600_QUERY_BUFFER_WAIT_TIME, + R600_QUERY_NUM_MAPPED_BUFFERS, + R600_QUERY_NUM_GFX_IBS, + R600_QUERY_NUM_SDMA_IBS, + R600_QUERY_GFX_BO_LIST_SIZE, + R600_QUERY_NUM_BYTES_MOVED, + R600_QUERY_NUM_EVICTIONS, + R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, + R600_QUERY_VRAM_USAGE, + R600_QUERY_VRAM_VIS_USAGE, + R600_QUERY_GTT_USAGE, + R600_QUERY_GPU_TEMPERATURE, + R600_QUERY_CURRENT_GPU_SCLK, + R600_QUERY_CURRENT_GPU_MCLK, + R600_QUERY_GPU_LOAD, + R600_QUERY_GPU_SHADERS_BUSY, + R600_QUERY_GPU_TA_BUSY, + R600_QUERY_GPU_GDS_BUSY, + R600_QUERY_GPU_VGT_BUSY, + R600_QUERY_GPU_IA_BUSY, + R600_QUERY_GPU_SX_BUSY, + R600_QUERY_GPU_WD_BUSY, + R600_QUERY_GPU_BCI_BUSY, + R600_QUERY_GPU_SC_BUSY, + R600_QUERY_GPU_PA_BUSY, + R600_QUERY_GPU_DB_BUSY, + R600_QUERY_GPU_CP_BUSY, + R600_QUERY_GPU_CB_BUSY, + R600_QUERY_GPU_SDMA_BUSY, + R600_QUERY_GPU_PFP_BUSY, + R600_QUERY_GPU_MEQ_BUSY, + R600_QUERY_GPU_ME_BUSY, + R600_QUERY_GPU_SURF_SYNC_BUSY, + R600_QUERY_GPU_CP_DMA_BUSY, + R600_QUERY_GPU_SCRATCH_RAM_BUSY, + R600_QUERY_NUM_COMPILATIONS, + R600_QUERY_NUM_SHADERS_CREATED, + R600_QUERY_NUM_SHADER_CACHE_HITS, + R600_QUERY_GPIN_ASIC_ID, + R600_QUERY_GPIN_NUM_SIMD, + R600_QUERY_GPIN_NUM_RB, + R600_QUERY_GPIN_NUM_SPI, + R600_QUERY_GPIN_NUM_SE, + + R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, +}; + +enum { + R600_QUERY_GROUP_GPIN = 0, + R600_NUM_SW_QUERY_GROUPS +}; + +struct r600_query_ops { + void (*destroy)(struct r600_common_screen *, struct r600_query *); + bool (*begin)(struct r600_common_context *, struct r600_query *); + bool (*end)(struct r600_common_context *, struct r600_query *); + bool (*get_result)(struct r600_common_context *, + struct r600_query *, bool wait, + union pipe_query_result *result); + void (*get_result_resource)(struct r600_common_context *, + struct r600_query *, bool wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); +}; + +struct r600_query { + struct threaded_query b; + struct r600_query_ops *ops; + + /* The type of query */ + unsigned type; +}; + +enum { + R600_QUERY_HW_FLAG_NO_START = (1 << 0), + /* gap */ + /* whether begin_query doesn't clear the result */ + R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), +}; + +struct r600_query_hw_ops { + bool (*prepare_buffer)(struct r600_common_screen *, + struct r600_query_hw *, + struct r600_resource *); + void (*emit_start)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*emit_stop)(struct r600_common_context *, + struct r600_query_hw *, + struct r600_resource *buffer, uint64_t va); + void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); + void (*add_result)(struct r600_common_screen *screen, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +}; + +struct r600_query_buffer { + /* The buffer where query results are stored. */ + struct r600_resource *buf; + /* Offset of the next free result after current query data */ + unsigned results_end; + /* If a query buffer is full, a new buffer is created and the old one + * is put in here. When we calculate the result, we sum up the samples + * from all buffers. */ + struct r600_query_buffer *previous; +}; + +struct r600_query_hw { + struct r600_query b; + struct r600_query_hw_ops *ops; + unsigned flags; + + /* The query buffer and how many results are in it. */ + struct r600_query_buffer buffer; + /* Size of the result in memory for both begin_query and end_query, + * this can be one or two numbers, or it could even be a size of a structure. */ + unsigned result_size; + /* The number of dwords for begin_query or end_query. */ + unsigned num_cs_dw_begin; + unsigned num_cs_dw_end; + /* Linked list of queries */ + struct list_head list; + /* For transform feedback: which stream the query is for */ + unsigned stream; + + /* Workaround via compute shader */ + struct r600_resource *workaround_buf; + unsigned workaround_offset; +}; + +bool r600_query_hw_init(struct r600_common_screen *rscreen, + struct r600_query_hw *query); +void r600_query_hw_destroy(struct r600_common_screen *rscreen, + struct r600_query *rquery); +bool r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery); +bool r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery); +bool r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + bool wait, + union pipe_query_result *result); + +/* Performance counters */ +enum { + /* This block is part of the shader engine */ + R600_PC_BLOCK_SE = (1 << 0), + + /* Expose per-instance groups instead of summing all instances (within + * an SE). */ + R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), + + /* Expose per-SE groups instead of summing instances across SEs. */ + R600_PC_BLOCK_SE_GROUPS = (1 << 2), + + /* Shader block */ + R600_PC_BLOCK_SHADER = (1 << 3), + + /* Non-shader block with perfcounters windowed by shaders. */ + R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), +}; + +/* Describes a hardware block with performance counters. Multiple instances of + * each block, possibly per-SE, may exist on the chip. Depending on the block + * and on the user's configuration, we either + * (a) expose every instance as a performance counter group, + * (b) expose a single performance counter group that reports the sum over all + * instances, or + * (c) expose one performance counter group per instance, but summed over all + * shader engines. + */ +struct r600_perfcounter_block { + const char *basename; + unsigned flags; + unsigned num_counters; + unsigned num_selectors; + unsigned num_instances; + + unsigned num_groups; + char *group_names; + unsigned group_name_stride; + + char *selector_names; + unsigned selector_name_stride; + + void *data; +}; + +struct r600_perfcounters { + unsigned num_groups; + unsigned num_blocks; + struct r600_perfcounter_block *blocks; + + unsigned num_start_cs_dwords; + unsigned num_stop_cs_dwords; + unsigned num_instance_cs_dwords; + unsigned num_shaders_cs_dwords; + + unsigned num_shader_types; + const char * const *shader_type_suffixes; + const unsigned *shader_type_bits; + + void (*get_size)(struct r600_perfcounter_block *, + unsigned count, unsigned *selectors, + unsigned *num_select_dw, unsigned *num_read_dw); + + void (*emit_instance)(struct r600_common_context *, + int se, int instance); + void (*emit_shaders)(struct r600_common_context *, unsigned shaders); + void (*emit_select)(struct r600_common_context *, + struct r600_perfcounter_block *, + unsigned count, unsigned *selectors); + void (*emit_start)(struct r600_common_context *, + struct r600_resource *buffer, uint64_t va); + void (*emit_stop)(struct r600_common_context *, + struct r600_resource *buffer, uint64_t va); + void (*emit_read)(struct r600_common_context *, + struct r600_perfcounter_block *, + unsigned count, unsigned *selectors, + struct r600_resource *buffer, uint64_t va); + + void (*cleanup)(struct r600_common_screen *); + + bool separate_se; + bool separate_instance; +}; + +struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, + unsigned num_queries, + unsigned *query_types); + +int r600_get_perfcounter_info(struct r600_common_screen *, + unsigned index, + struct pipe_driver_query_info *info); +int r600_get_perfcounter_group_info(struct r600_common_screen *, + unsigned index, + struct pipe_driver_query_group_info *info); + +bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); +void r600_perfcounters_add_block(struct r600_common_screen *, + struct r600_perfcounters *, + const char *name, unsigned flags, + unsigned counters, unsigned selectors, + unsigned instances, void *data); +void r600_perfcounters_do_destroy(struct r600_perfcounters *); +void r600_query_hw_reset_buffers(struct r600_common_context *rctx, + struct r600_query_hw *query); + +struct r600_qbo_state { + void *saved_compute; + struct pipe_constant_buffer saved_const0; + struct pipe_shader_buffer saved_ssbo[3]; +}; + +#endif /* R600_QUERY_H */ diff --git a/lib/mesa/src/gallium/drivers/r600/r600_streamout.c b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c new file mode 100644 index 000000000..78334066c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c @@ -0,0 +1,365 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +#include "r600_pipe_common.h" +#include "r600_cs.h" + +#include "util/u_memory.h" +#include "evergreend.h" + +#define R_008490_CP_STRMOUT_CNTL 0x008490 +#define R_028AB0_VGT_STRMOUT_EN 0x028AB0 +#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20 + +static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable); + +static struct pipe_stream_output_target * +r600_create_so_target(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_so_target *t; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + t = CALLOC_STRUCT(r600_so_target); + if (!t) { + return NULL; + } + + u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4, + &t->buf_filled_size_offset, + (struct pipe_resource**)&t->buf_filled_size); + if (!t->buf_filled_size) { + FREE(t); + return NULL; + } + + t->b.reference.count = 1; + t->b.context = ctx; + pipe_resource_reference(&t->b.buffer, buffer); + t->b.buffer_offset = buffer_offset; + t->b.buffer_size = buffer_size; + + util_range_add(&rbuffer->valid_buffer_range, buffer_offset, + buffer_offset + buffer_size); + return &t->b; +} + +static void r600_so_target_destroy(struct pipe_context *ctx, + struct pipe_stream_output_target *target) +{ + struct r600_so_target *t = (struct r600_so_target*)target; + pipe_resource_reference(&t->b.buffer, NULL); + r600_resource_reference(&t->buf_filled_size, NULL); + FREE(t); +} + +void r600_streamout_buffers_dirty(struct r600_common_context *rctx) +{ + struct r600_atom *begin = &rctx->streamout.begin_atom; + unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask); + unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask & + rctx->streamout.append_bitmask); + + if (!num_bufs) + return; + + rctx->streamout.num_dw_for_end = + 12 + /* flush_vgt_streamout */ + num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */ + + begin->num_dw = 12; /* flush_vgt_streamout */ + + begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */ + + if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) + begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */ + + begin->num_dw += + num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */ + (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */ + (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */ + + rctx->set_atom_dirty(rctx, begin, true); + + r600_set_streamout_enable(rctx, true); +} + +void r600_set_streamout_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + unsigned i; + unsigned enabled_mask = 0, append_bitmask = 0; + + /* Stop streamout. */ + if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) { + r600_emit_streamout_end(rctx); + } + + /* Set the new targets. */ + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]); + if (!targets[i]) + continue; + + r600_context_add_resource_size(ctx, targets[i]->buffer); + enabled_mask |= 1 << i; + if (offsets[i] == ((unsigned)-1)) + append_bitmask |= 1 << i; + } + for (; i < rctx->streamout.num_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL); + } + + rctx->streamout.enabled_mask = enabled_mask; + + rctx->streamout.num_targets = num_targets; + rctx->streamout.append_bitmask = append_bitmask; + + if (num_targets) { + r600_streamout_buffers_dirty(rctx); + } else { + rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false); + r600_set_streamout_enable(rctx, false); + } +} + +static void r600_flush_vgt_streamout(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + unsigned reg_strmout_cntl; + + /* The register is at different places on different ASICs. */ + if (rctx->chip_class >= EVERGREEN) { + reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; + } else { + reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL; + } + + radeon_set_config_reg(cs, reg_strmout_cntl, 0); + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); + + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ + radeon_emit(cs, 0); + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ + radeon_emit(cs, 4); /* poll interval */ +} + +static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct r600_so_target **t = rctx->streamout.targets; + uint16_t *stride_in_dw = rctx->streamout.stride_in_dw; + unsigned i, update_flags = 0; + + r600_flush_vgt_streamout(rctx); + + for (i = 0; i < rctx->streamout.num_targets; i++) { + if (!t[i]) + continue; + + t[i]->stride_in_dw = stride_in_dw[i]; + + uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address; + + update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); + + radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3); + radeon_emit(cs, (t[i]->b.buffer_offset + + t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ + radeon_emit(cs, va >> 8); /* BUFFER_BASE */ + + r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); + + /* R7xx requires this packet after updating BUFFER_BASE. + * Without this, R7xx locks up. */ + if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) { + radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0)); + radeon_emit(cs, i); + radeon_emit(cs, va >> 8); + + r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); + } + + if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) { + uint64_t va = t[i]->buf_filled_size->gpu_address + + t[i]->buf_filled_size_offset; + + /* Append. */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); /* src address lo */ + radeon_emit(cs, va >> 32); /* src address hi */ + + r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size, + RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); + } else { + /* Start from the beginning. */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */ + radeon_emit(cs, 0); /* unused */ + } + } + + if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) { + radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0)); + radeon_emit(cs, update_flags); + } + rctx->streamout.begin_emitted = true; +} + +void r600_emit_streamout_end(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct r600_so_target **t = rctx->streamout.targets; + unsigned i; + uint64_t va; + + r600_flush_vgt_streamout(rctx); + + for (i = 0; i < rctx->streamout.num_targets; i++) { + if (!t[i]) + continue; + + va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | + STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ + radeon_emit(cs, va); /* dst address lo */ + radeon_emit(cs, va >> 32); /* dst address hi */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + + r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size, + RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE); + + /* Zero the buffer size. The counters (primitives generated, + * primitives emitted) may be enabled even if there is not + * buffer bound. This ensures that the primitives-emitted query + * won't increment. */ + radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); + + t[i]->buf_filled_size_valid = true; + } + + rctx->streamout.begin_emitted = false; + rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; +} + +/* STREAMOUT CONFIG DERIVED STATE + * + * Streamout must be enabled for the PRIMITIVES_GENERATED query to work. + * The buffer mask is an independent state, so no writes occur if there + * are no buffers bound. + */ + +static void r600_emit_streamout_enable(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN; + unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)); + unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN; + unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask & + rctx->streamout.enabled_stream_buffers_mask; + + if (rctx->chip_class >= EVERGREEN) { + strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG; + + strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG; + strmout_config_val |= + S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) | + S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) | + S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx)); + } + radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val); + radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val); +} + +static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable) +{ + bool old_strmout_en = r600_get_strmout_en(rctx); + unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask; + + rctx->streamout.streamout_enabled = enable; + + rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask | + (rctx->streamout.enabled_mask << 4) | + (rctx->streamout.enabled_mask << 8) | + (rctx->streamout.enabled_mask << 12); + + if ((old_strmout_en != r600_get_strmout_en(rctx)) || + (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) { + rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true); + } +} + +void r600_update_prims_generated_query_state(struct r600_common_context *rctx, + unsigned type, int diff) +{ + if (type == PIPE_QUERY_PRIMITIVES_GENERATED) { + bool old_strmout_en = r600_get_strmout_en(rctx); + + rctx->streamout.num_prims_gen_queries += diff; + assert(rctx->streamout.num_prims_gen_queries >= 0); + + rctx->streamout.prims_gen_query_enabled = + rctx->streamout.num_prims_gen_queries != 0; + + if (old_strmout_en != r600_get_strmout_en(rctx)) { + rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true); + } + } +} + +void r600_streamout_init(struct r600_common_context *rctx) +{ + rctx->b.create_stream_output_target = r600_create_so_target; + rctx->b.stream_output_target_destroy = r600_so_target_destroy; + rctx->streamout.begin_atom.emit = r600_emit_streamout_begin; + rctx->streamout.enable_atom.emit = r600_emit_streamout_enable; + rctx->streamout.enable_atom.num_dw = 6; +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c new file mode 100644 index 000000000..9e1ff9e5f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c @@ -0,0 +1,398 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* This file implements randomized SDMA texture blit tests. */ + +#include "r600_pipe_common.h" +#include "util/u_surface.h" +#include "util/rand_xor.h" + +static uint64_t seed_xorshift128plus[2]; + +#define RAND_NUM_SIZE 8 + +/* The GPU blits are emulated on the CPU using these CPU textures. */ + +struct cpu_texture { + uint8_t *ptr; + uint64_t size; + uint64_t layer_stride; + unsigned stride; +}; + +static void alloc_cpu_texture(struct cpu_texture *tex, + struct pipe_resource *templ, int bpp) +{ + tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE); + tex->layer_stride = (uint64_t)tex->stride * templ->height0; + tex->size = tex->layer_stride * templ->array_size; + tex->ptr = malloc(tex->size); + assert(tex->ptr); +} + +static void set_random_pixels(struct pipe_context *ctx, + struct pipe_resource *tex, + struct cpu_texture *cpu) +{ + struct pipe_transfer *t; + uint8_t *map; + int x,y,z; + + map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE, + 0, 0, 0, tex->width0, tex->height0, + tex->array_size, &t); + assert(map); + + for (z = 0; z < tex->array_size; z++) { + for (y = 0; y < tex->height0; y++) { + uint64_t *ptr = (uint64_t*) + (map + t->layer_stride*z + t->stride*y); + uint64_t *ptr_cpu = (uint64_t*) + (cpu->ptr + cpu->layer_stride*z + cpu->stride*y); + unsigned size = cpu->stride / RAND_NUM_SIZE; + + assert(t->stride % RAND_NUM_SIZE == 0); + assert(cpu->stride % RAND_NUM_SIZE == 0); + + for (x = 0; x < size; x++) { + *ptr++ = *ptr_cpu++ = + rand_xorshift128plus(seed_xorshift128plus); + } + } + } + + pipe_transfer_unmap(ctx, t); +} + +static bool compare_textures(struct pipe_context *ctx, + struct pipe_resource *tex, + struct cpu_texture *cpu, int bpp) +{ + struct pipe_transfer *t; + uint8_t *map; + int y,z; + bool pass = true; + + map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ, + 0, 0, 0, tex->width0, tex->height0, + tex->array_size, &t); + assert(map); + + for (z = 0; z < tex->array_size; z++) { + for (y = 0; y < tex->height0; y++) { + uint8_t *ptr = map + t->layer_stride*z + t->stride*y; + uint8_t *cpu_ptr = cpu->ptr + + cpu->layer_stride*z + cpu->stride*y; + + if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) { + pass = false; + goto done; + } + } + } +done: + pipe_transfer_unmap(ctx, t); + return pass; +} + +static enum pipe_format get_format_from_bpp(int bpp) +{ + switch (bpp) { + case 1: + return PIPE_FORMAT_R8_UINT; + case 2: + return PIPE_FORMAT_R16_UINT; + case 4: + return PIPE_FORMAT_R32_UINT; + case 8: + return PIPE_FORMAT_R32G32_UINT; + case 16: + return PIPE_FORMAT_R32G32B32A32_UINT; + default: + assert(0); + return PIPE_FORMAT_NONE; + } +} + +static const char *array_mode_to_string(struct r600_common_screen *rscreen, + struct radeon_surf *surf) +{ + if (rscreen->chip_class >= GFX9) { + /* TODO */ + return " UNKNOWN"; + } else { + switch (surf->u.legacy.level[0].mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + return "LINEAR_ALIGNED"; + case RADEON_SURF_MODE_1D: + return "1D_TILED_THIN1"; + case RADEON_SURF_MODE_2D: + return "2D_TILED_THIN1"; + default: + assert(0); + return " UNKNOWN"; + } + } +} + +static unsigned generate_max_tex_side(unsigned max_tex_side) +{ + switch (rand() % 4) { + case 0: + /* Try to hit large sizes in 1/4 of the cases. */ + return max_tex_side; + case 1: + /* Try to hit 1D tiling in 1/4 of the cases. */ + return 128; + default: + /* Try to hit common sizes in 2/4 of the cases. */ + return 2048; + } +} + +void r600_test_dma(struct r600_common_screen *rscreen) +{ + struct pipe_screen *screen = &rscreen->b; + struct pipe_context *ctx = screen->context_create(screen, NULL, 0); + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + uint64_t max_alloc_size; + unsigned i, iterations, num_partial_copies, max_levels, max_tex_side; + unsigned num_pass = 0, num_fail = 0; + + max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + max_tex_side = 1 << (max_levels - 1); + + /* Max 128 MB allowed for both textures. */ + max_alloc_size = 128 * 1024 * 1024; + + /* the seed for random test parameters */ + srand(0x9b47d95b); + /* the seed for random pixel data */ + s_rand_xorshift128plus(seed_xorshift128plus, false); + + iterations = 1000000000; /* just kill it when you are bored */ + num_partial_copies = 30; + + /* These parameters are randomly generated per test: + * - whether to do one whole-surface copy or N partial copies per test + * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D) + * - which texture dimensions to use + * - whether to use VRAM (all tiling modes) and GTT (staging, linear + * only) allocations + * - random initial pixels in src + * - generate random subrectangle copies for partial blits + */ + for (i = 0; i < iterations; i++) { + struct pipe_resource tsrc = {}, tdst = {}, *src, *dst; + struct r600_texture *rdst; + struct r600_texture *rsrc; + struct cpu_texture src_cpu, dst_cpu; + unsigned bpp, max_width, max_height, max_depth, j, num; + unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen; + unsigned max_tex_layers; + bool pass; + bool do_partial_copies = rand() & 1; + + /* generate a random test case */ + tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY; + tsrc.depth0 = tdst.depth0 = 1; + + bpp = 1 << (rand() % 5); + tsrc.format = tdst.format = get_format_from_bpp(bpp); + + max_tex_side_gen = generate_max_tex_side(max_tex_side); + max_tex_layers = rand() % 4 ? 1 : 5; + + tsrc.width0 = (rand() % max_tex_side_gen) + 1; + tsrc.height0 = (rand() % max_tex_side_gen) + 1; + tsrc.array_size = (rand() % max_tex_layers) + 1; + + /* Have a 1/4 chance of getting power-of-two dimensions. */ + if (rand() % 4 == 0) { + tsrc.width0 = util_next_power_of_two(tsrc.width0); + tsrc.height0 = util_next_power_of_two(tsrc.height0); + } + + if (!do_partial_copies) { + /* whole-surface copies only, same dimensions */ + tdst = tsrc; + } else { + max_tex_side_gen = generate_max_tex_side(max_tex_side); + max_tex_layers = rand() % 4 ? 1 : 5; + + /* many partial copies, dimensions can be different */ + tdst.width0 = (rand() % max_tex_side_gen) + 1; + tdst.height0 = (rand() % max_tex_side_gen) + 1; + tdst.array_size = (rand() % max_tex_layers) + 1; + + /* Have a 1/4 chance of getting power-of-two dimensions. */ + if (rand() % 4 == 0) { + tdst.width0 = util_next_power_of_two(tdst.width0); + tdst.height0 = util_next_power_of_two(tdst.height0); + } + } + + /* check texture sizes */ + if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp + + (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp > + max_alloc_size) { + /* too large, try again */ + i--; + continue; + } + + /* VRAM + the tiling mode depends on dimensions (3/4 of cases), + * or GTT + linear only (1/4 of cases) + */ + tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; + tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; + + /* Allocate textures (both the GPU and CPU copies). + * The CPU will emulate what the GPU should be doing. + */ + src = screen->resource_create(screen, &tsrc); + dst = screen->resource_create(screen, &tdst); + assert(src); + assert(dst); + rdst = (struct r600_texture*)dst; + rsrc = (struct r600_texture*)src; + alloc_cpu_texture(&src_cpu, &tsrc, bpp); + alloc_cpu_texture(&dst_cpu, &tdst, bpp); + + printf("%4u: dst = (%5u x %5u x %u, %s), " + " src = (%5u x %5u x %u, %s), bpp = %2u, ", + i, tdst.width0, tdst.height0, tdst.array_size, + array_mode_to_string(rscreen, &rdst->surface), + tsrc.width0, tsrc.height0, tsrc.array_size, + array_mode_to_string(rscreen, &rsrc->surface), bpp); + fflush(stdout); + + /* set src pixels */ + set_random_pixels(ctx, src, &src_cpu); + + /* clear dst pixels */ + rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true); + memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); + + /* preparation */ + max_width = MIN2(tsrc.width0, tdst.width0); + max_height = MIN2(tsrc.height0, tdst.height0); + max_depth = MIN2(tsrc.array_size, tdst.array_size); + + num = do_partial_copies ? num_partial_copies : 1; + for (j = 0; j < num; j++) { + int width, height, depth; + int srcx, srcy, srcz, dstx, dsty, dstz; + struct pipe_box box; + unsigned old_num_draw_calls = rctx->num_draw_calls; + unsigned old_num_dma_calls = rctx->num_dma_calls; + + if (!do_partial_copies) { + /* copy whole src to dst */ + width = max_width; + height = max_height; + depth = max_depth; + + srcx = srcy = srcz = dstx = dsty = dstz = 0; + } else { + /* random sub-rectangle copies from src to dst */ + depth = (rand() % max_depth) + 1; + srcz = rand() % (tsrc.array_size - depth + 1); + dstz = rand() % (tdst.array_size - depth + 1); + + /* special code path to hit the tiled partial copies */ + if (!rsrc->surface.is_linear && + !rdst->surface.is_linear && + rand() & 1) { + if (max_width < 8 || max_height < 8) + continue; + width = ((rand() % (max_width / 8)) + 1) * 8; + height = ((rand() % (max_height / 8)) + 1) * 8; + + srcx = rand() % (tsrc.width0 - width + 1) & ~0x7; + srcy = rand() % (tsrc.height0 - height + 1) & ~0x7; + + dstx = rand() % (tdst.width0 - width + 1) & ~0x7; + dsty = rand() % (tdst.height0 - height + 1) & ~0x7; + } else { + /* just make sure that it doesn't divide by zero */ + assert(max_width > 0 && max_height > 0); + + width = (rand() % max_width) + 1; + height = (rand() % max_height) + 1; + + srcx = rand() % (tsrc.width0 - width + 1); + srcy = rand() % (tsrc.height0 - height + 1); + + dstx = rand() % (tdst.width0 - width + 1); + dsty = rand() % (tdst.height0 - height + 1); + } + + /* special code path to hit out-of-bounds reads in L2T */ + if (rsrc->surface.is_linear && + !rdst->surface.is_linear && + rand() % 4 == 0) { + srcx = 0; + srcy = 0; + srcz = 0; + } + } + + /* GPU copy */ + u_box_3d(srcx, srcy, srcz, width, height, depth, &box); + rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box); + + /* See which engine was used. */ + gfx_blits += rctx->num_draw_calls > old_num_draw_calls; + dma_blits += rctx->num_dma_calls > old_num_dma_calls; + + /* CPU copy */ + util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride, + dst_cpu.layer_stride, + dstx, dsty, dstz, width, height, depth, + src_cpu.ptr, src_cpu.stride, + src_cpu.layer_stride, + srcx, srcy, srcz); + } + + pass = compare_textures(ctx, dst, &dst_cpu, bpp); + if (pass) + num_pass++; + else + num_fail++; + + printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n", + gfx_blits, dma_blits, pass ? "pass" : "fail", + num_pass, num_pass+num_fail); + + /* cleanup */ + pipe_resource_reference(&src, NULL); + pipe_resource_reference(&dst, NULL); + free(src_cpu.ptr); + free(dst_cpu.ptr); + } + + ctx->destroy(ctx); + exit(0); +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_texture.c b/lib/mesa/src/gallium/drivers/r600/r600_texture.c new file mode 100644 index 000000000..b84111449 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_texture.c @@ -0,0 +1,1953 @@ +/* + * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ +#include "r600_pipe_common.h" +#include "r600_cs.h" +#include "r600_query.h" +#include "util/u_format.h" +#include "util/u_log.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "util/u_surface.h" +#include "os/os_time.h" +#include <errno.h> +#include <inttypes.h> + +static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, + struct r600_texture *rtex); +static enum radeon_surf_mode +r600_choose_tiling(struct r600_common_screen *rscreen, + const struct pipe_resource *templ); + + +bool r600_prepare_for_dma_blit(struct r600_common_context *rctx, + struct r600_texture *rdst, + unsigned dst_level, unsigned dstx, + unsigned dsty, unsigned dstz, + struct r600_texture *rsrc, + unsigned src_level, + const struct pipe_box *src_box) +{ + if (!rctx->dma.cs) + return false; + + if (rdst->surface.bpe != rsrc->surface.bpe) + return false; + + /* MSAA: Blits don't exist in the real world. */ + if (rsrc->resource.b.b.nr_samples > 1 || + rdst->resource.b.b.nr_samples > 1) + return false; + + /* Depth-stencil surfaces: + * When dst is linear, the DB->CB copy preserves HTILE. + * When dst is tiled, the 3D path must be used to update HTILE. + */ + if (rsrc->is_depth || rdst->is_depth) + return false; + + /* CMASK as: + * src: Both texture and SDMA paths need decompression. Use SDMA. + * dst: If overwriting the whole texture, discard CMASK and use + * SDMA. Otherwise, use the 3D path. + */ + if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) { + /* The CMASK clear is only enabled for the first level. */ + assert(dst_level == 0); + if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level, + dstx, dsty, dstz, src_box->width, + src_box->height, src_box->depth)) + return false; + + r600_texture_discard_cmask(rctx->screen, rdst); + } + + /* All requirements are met. Prepare textures for SDMA. */ + if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level)) + rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b); + + assert(!(rsrc->dirty_level_mask & (1 << src_level))); + assert(!(rdst->dirty_level_mask & (1 << dst_level))); + + return true; +} + +/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */ +static void r600_copy_region_with_blit(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_blit_info blit; + + memset(&blit, 0, sizeof(blit)); + blit.src.resource = src; + blit.src.format = src->format; + blit.src.level = src_level; + blit.src.box = *src_box; + blit.dst.resource = dst; + blit.dst.format = dst->format; + blit.dst.level = dst_level; + blit.dst.box.x = dstx; + blit.dst.box.y = dsty; + blit.dst.box.z = dstz; + blit.dst.box.width = src_box->width; + blit.dst.box.height = src_box->height; + blit.dst.box.depth = src_box->depth; + blit.mask = util_format_get_mask(src->format) & + util_format_get_mask(dst->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + if (blit.mask) { + pipe->blit(pipe, &blit); + } +} + +/* Copy from a full GPU texture to a transfer's staging one. */ +static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *dst = &rtransfer->staging->b.b; + struct pipe_resource *src = transfer->resource; + + if (src->nr_samples > 1) { + r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, + src, transfer->level, &transfer->box); + return; + } + + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level, + &transfer->box); +} + +/* Copy from a transfer's staging texture to a full GPU one. */ +static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *dst = transfer->resource; + struct pipe_resource *src = &rtransfer->staging->b.b; + struct pipe_box sbox; + + u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox); + + if (dst->nr_samples > 1) { + r600_copy_region_with_blit(ctx, dst, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + src, 0, &sbox); + return; + } + + rctx->dma_copy(ctx, dst, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + src, 0, &sbox); +} + +static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, + struct r600_texture *rtex, unsigned level, + const struct pipe_box *box, + unsigned *stride, + unsigned *layer_stride) +{ + *stride = rtex->surface.u.legacy.level[level].nblk_x * + rtex->surface.bpe; + *layer_stride = rtex->surface.u.legacy.level[level].slice_size; + + if (!box) + return rtex->surface.u.legacy.level[level].offset; + + /* Each texture is an array of mipmap levels. Each level is + * an array of slices. */ + return rtex->surface.u.legacy.level[level].offset + + box->z * rtex->surface.u.legacy.level[level].slice_size + + (box->y / rtex->surface.blk_h * + rtex->surface.u.legacy.level[level].nblk_x + + box->x / rtex->surface.blk_w) * rtex->surface.bpe; +} + +static int r600_init_surface(struct r600_common_screen *rscreen, + struct radeon_surf *surface, + const struct pipe_resource *ptex, + enum radeon_surf_mode array_mode, + unsigned pitch_in_bytes_override, + unsigned offset, + bool is_imported, + bool is_scanout, + bool is_flushed_depth) +{ + const struct util_format_description *desc = + util_format_description(ptex->format); + bool is_depth, is_stencil; + int r; + unsigned i, bpe, flags = 0; + + is_depth = util_format_has_depth(desc); + is_stencil = util_format_has_stencil(desc); + + if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth && + ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + bpe = 4; /* stencil is allocated separately on evergreen */ + } else { + bpe = util_format_get_blocksize(ptex->format); + assert(util_is_power_of_two(bpe)); + } + + if (!is_flushed_depth && is_depth) { + flags |= RADEON_SURF_ZBUFFER; + + if (is_stencil) + flags |= RADEON_SURF_SBUFFER; + } + + if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { + /* This should catch bugs in gallium users setting incorrect flags. */ + assert(ptex->nr_samples <= 1 && + ptex->array_size == 1 && + ptex->depth0 == 1 && + ptex->last_level == 0 && + !(flags & RADEON_SURF_Z_OR_SBUFFER)); + + flags |= RADEON_SURF_SCANOUT; + } + + if (ptex->bind & PIPE_BIND_SHARED) + flags |= RADEON_SURF_SHAREABLE; + if (is_imported) + flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE; + if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING)) + flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; + + r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe, + array_mode, surface); + if (r) { + return r; + } + + if (pitch_in_bytes_override && + pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) { + /* old ddx on evergreen over estimate alignment for 1d, only 1 level + * for those + */ + surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe; + surface->u.legacy.level[0].slice_size = pitch_in_bytes_override * + surface->u.legacy.level[0].nblk_y; + } + + if (offset) { + for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) + surface->u.legacy.level[i].offset += offset; + } + + return 0; +} + +static void r600_texture_init_metadata(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct radeon_bo_metadata *metadata) +{ + struct radeon_surf *surface = &rtex->surface; + + memset(metadata, 0, sizeof(*metadata)); + + metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; + metadata->u.legacy.bankw = surface->u.legacy.bankw; + metadata->u.legacy.bankh = surface->u.legacy.bankh; + metadata->u.legacy.tile_split = surface->u.legacy.tile_split; + metadata->u.legacy.mtilea = surface->u.legacy.mtilea; + metadata->u.legacy.num_banks = surface->u.legacy.num_banks; + metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; + metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; +} + +static void r600_surface_import_metadata(struct r600_common_screen *rscreen, + struct radeon_surf *surf, + struct radeon_bo_metadata *metadata, + enum radeon_surf_mode *array_mode, + bool *is_scanout) +{ + surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; + surf->u.legacy.bankw = metadata->u.legacy.bankw; + surf->u.legacy.bankh = metadata->u.legacy.bankh; + surf->u.legacy.tile_split = metadata->u.legacy.tile_split; + surf->u.legacy.mtilea = metadata->u.legacy.mtilea; + surf->u.legacy.num_banks = metadata->u.legacy.num_banks; + + if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) + *array_mode = RADEON_SURF_MODE_2D; + else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) + *array_mode = RADEON_SURF_MODE_1D; + else + *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + + *is_scanout = metadata->u.legacy.scanout; +} + +static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, + struct r600_texture *rtex) +{ + struct r600_common_screen *rscreen = rctx->screen; + struct pipe_context *ctx = &rctx->b; + + if (ctx == rscreen->aux_context) + mtx_lock(&rscreen->aux_context_lock); + + ctx->flush_resource(ctx, &rtex->resource.b.b); + ctx->flush(ctx, NULL, 0); + + if (ctx == rscreen->aux_context) + mtx_unlock(&rscreen->aux_context_lock); +} + +static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + if (!rtex->cmask.size) + return; + + assert(rtex->resource.b.b.nr_samples <= 1); + + /* Disable CMASK. */ + memset(&rtex->cmask, 0, sizeof(rtex->cmask)); + rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; + rtex->dirty_level_mask = 0; + + rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1); + + if (rtex->cmask_buffer != &rtex->resource) + r600_resource_reference(&rtex->cmask_buffer, NULL); + + /* Notify all contexts about the change. */ + p_atomic_inc(&rscreen->dirty_tex_counter); + p_atomic_inc(&rscreen->compressed_colortex_counter); +} + +static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, + struct r600_texture *rtex, + unsigned new_bind_flag, + bool invalidate_storage) +{ + struct pipe_screen *screen = rctx->b.screen; + struct r600_texture *new_tex; + struct pipe_resource templ = rtex->resource.b.b; + unsigned i; + + templ.bind |= new_bind_flag; + + /* r600g doesn't react to dirty_tex_descriptor_counter */ + if (rctx->chip_class < SI) + return; + + if (rtex->resource.b.is_shared) + return; + + if (new_bind_flag == PIPE_BIND_LINEAR) { + if (rtex->surface.is_linear) + return; + + /* This fails with MSAA, depth, and compressed textures. */ + if (r600_choose_tiling(rctx->screen, &templ) != + RADEON_SURF_MODE_LINEAR_ALIGNED) + return; + } + + new_tex = (struct r600_texture*)screen->resource_create(screen, &templ); + if (!new_tex) + return; + + /* Copy the pixels to the new texture. */ + if (!invalidate_storage) { + for (i = 0; i <= templ.last_level; i++) { + struct pipe_box box; + + u_box_3d(0, 0, 0, + u_minify(templ.width0, i), u_minify(templ.height0, i), + util_max_layer(&templ, i) + 1, &box); + + rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0, + &rtex->resource.b.b, i, &box); + } + } + + if (new_bind_flag == PIPE_BIND_LINEAR) { + r600_texture_discard_cmask(rctx->screen, rtex); + } + + /* Replace the structure fields of rtex. */ + rtex->resource.b.b.bind = templ.bind; + pb_reference(&rtex->resource.buf, new_tex->resource.buf); + rtex->resource.gpu_address = new_tex->resource.gpu_address; + rtex->resource.vram_usage = new_tex->resource.vram_usage; + rtex->resource.gart_usage = new_tex->resource.gart_usage; + rtex->resource.bo_size = new_tex->resource.bo_size; + rtex->resource.bo_alignment = new_tex->resource.bo_alignment; + rtex->resource.domains = new_tex->resource.domains; + rtex->resource.flags = new_tex->resource.flags; + rtex->size = new_tex->size; + rtex->db_render_format = new_tex->db_render_format; + rtex->db_compatible = new_tex->db_compatible; + rtex->can_sample_z = new_tex->can_sample_z; + rtex->can_sample_s = new_tex->can_sample_s; + rtex->surface = new_tex->surface; + rtex->fmask = new_tex->fmask; + rtex->cmask = new_tex->cmask; + rtex->cb_color_info = new_tex->cb_color_info; + rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; + rtex->htile_offset = new_tex->htile_offset; + rtex->depth_cleared = new_tex->depth_cleared; + rtex->stencil_cleared = new_tex->stencil_cleared; + rtex->non_disp_tiling = new_tex->non_disp_tiling; + rtex->framebuffers_bound = new_tex->framebuffers_bound; + + if (new_bind_flag == PIPE_BIND_LINEAR) { + assert(!rtex->htile_offset); + assert(!rtex->cmask.size); + assert(!rtex->fmask.size); + assert(!rtex->is_depth); + } + + r600_texture_reference(&new_tex, NULL); + + p_atomic_inc(&rctx->screen->dirty_tex_counter); +} + +static boolean r600_texture_get_handle(struct pipe_screen* screen, + struct pipe_context *ctx, + struct pipe_resource *resource, + struct winsys_handle *whandle, + unsigned usage) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_common_context *rctx; + struct r600_resource *res = (struct r600_resource*)resource; + struct r600_texture *rtex = (struct r600_texture*)resource; + struct radeon_bo_metadata metadata; + bool update_metadata = false; + unsigned stride, offset, slice_size; + + ctx = threaded_context_unwrap_sync(ctx); + rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context); + + if (resource->target != PIPE_BUFFER) { + /* This is not supported now, but it might be required for OpenCL + * interop in the future. + */ + if (resource->nr_samples > 1 || rtex->is_depth) + return false; + + /* Move a suballocated texture into a non-suballocated allocation. */ + if (rscreen->ws->buffer_is_suballocated(res->buf) || + rtex->surface.tile_swizzle) { + assert(!res->b.is_shared); + r600_reallocate_texture_inplace(rctx, rtex, + PIPE_BIND_SHARED, false); + rctx->b.flush(&rctx->b, NULL, 0); + assert(res->b.b.bind & PIPE_BIND_SHARED); + assert(res->flags & RADEON_FLAG_NO_SUBALLOC); + assert(rtex->surface.tile_swizzle == 0); + } + + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + rtex->cmask.size) { + /* Eliminate fast clear (CMASK) */ + r600_eliminate_fast_color_clear(rctx, rtex); + + /* Disable CMASK if flush_resource isn't going + * to be called. + */ + if (rtex->cmask.size) + r600_texture_discard_cmask(rscreen, rtex); + } + + /* Set metadata. */ + if (!res->b.is_shared || update_metadata) { + r600_texture_init_metadata(rscreen, rtex, &metadata); + if (rscreen->query_opaque_metadata) + rscreen->query_opaque_metadata(rscreen, rtex, + &metadata); + + rscreen->ws->buffer_set_metadata(res->buf, &metadata); + } + + offset = rtex->surface.u.legacy.level[0].offset; + stride = rtex->surface.u.legacy.level[0].nblk_x * + rtex->surface.bpe; + slice_size = rtex->surface.u.legacy.level[0].slice_size; + } else { + /* Move a suballocated buffer into a non-suballocated allocation. */ + if (rscreen->ws->buffer_is_suballocated(res->buf)) { + assert(!res->b.is_shared); + + /* Allocate a new buffer with PIPE_BIND_SHARED. */ + struct pipe_resource templ = res->b.b; + templ.bind |= PIPE_BIND_SHARED; + + struct pipe_resource *newb = + screen->resource_create(screen, &templ); + if (!newb) + return false; + + /* Copy the old buffer contents to the new one. */ + struct pipe_box box; + u_box_1d(0, newb->width0, &box); + rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0, + &res->b.b, 0, &box); + /* Move the new buffer storage to the old pipe_resource. */ + r600_replace_buffer_storage(&rctx->b, &res->b.b, newb); + pipe_resource_reference(&newb, NULL); + + assert(res->b.b.bind & PIPE_BIND_SHARED); + assert(res->flags & RADEON_FLAG_NO_SUBALLOC); + } + + /* Buffers */ + offset = 0; + stride = 0; + slice_size = 0; + } + + if (res->b.is_shared) { + /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user + * doesn't set it. + */ + res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) + res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; + } else { + res->b.is_shared = true; + res->external_usage = usage; + } + + return rscreen->ws->buffer_get_handle(res->buf, stride, offset, + slice_size, whandle); +} + +static void r600_texture_destroy(struct pipe_screen *screen, + struct pipe_resource *ptex) +{ + struct r600_texture *rtex = (struct r600_texture*)ptex; + struct r600_resource *resource = &rtex->resource; + + r600_texture_reference(&rtex->flushed_depth_texture, NULL); + + if (rtex->cmask_buffer != &rtex->resource) { + r600_resource_reference(&rtex->cmask_buffer, NULL); + } + pb_reference(&resource->buf, NULL); + FREE(rtex); +} + +static const struct u_resource_vtbl r600_texture_vtbl; + +/* The number of samples can be specified independently of the texture. */ +void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + unsigned nr_samples, + struct r600_fmask_info *out) +{ + /* FMASK is allocated like an ordinary texture. */ + struct pipe_resource templ = rtex->resource.b.b; + struct radeon_surf fmask = {}; + unsigned flags, bpe; + + memset(out, 0, sizeof(*out)); + + templ.nr_samples = 1; + flags = rtex->surface.flags | RADEON_SURF_FMASK; + + /* Use the same parameters and tile mode. */ + fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw; + fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh; + fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea; + fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split; + + if (nr_samples <= 4) + fmask.u.legacy.bankh = 4; + + switch (nr_samples) { + case 2: + case 4: + bpe = 1; + break; + case 8: + bpe = 4; + break; + default: + R600_ERR("Invalid sample count for FMASK allocation.\n"); + return; + } + + /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption. + * This can be fixed by writing a separate FMASK allocator specifically + * for R600-R700 asics. */ + if (rscreen->chip_class <= R700) { + bpe *= 2; + } + + if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe, + RADEON_SURF_MODE_2D, &fmask)) { + R600_ERR("Got error in surface_init while allocating FMASK.\n"); + return; + } + + assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); + + out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; + if (out->slice_tile_max) + out->slice_tile_max -= 1; + + out->tile_mode_index = fmask.u.legacy.tiling_index[0]; + out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; + out->bank_height = fmask.u.legacy.bankh; + out->tile_swizzle = fmask.tile_swizzle; + out->alignment = MAX2(256, fmask.surf_alignment); + out->size = fmask.surf_size; +} + +static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + r600_texture_get_fmask_info(rscreen, rtex, + rtex->resource.b.b.nr_samples, &rtex->fmask); + + rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment); + rtex->size = rtex->fmask.offset + rtex->fmask.size; +} + +void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct r600_cmask_info *out) +{ + unsigned cmask_tile_width = 8; + unsigned cmask_tile_height = 8; + unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height; + unsigned element_bits = 4; + unsigned cmask_cache_bits = 1024; + unsigned num_pipes = rscreen->info.num_tile_pipes; + unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; + + unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes; + unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements; + unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile); + unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile); + unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width; + + unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width); + unsigned height = align(rtex->resource.b.b.height0, macro_tile_height); + + unsigned base_align = num_pipes * pipe_interleave_bytes; + unsigned slice_bytes = + ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements; + + assert(macro_tile_width % 128 == 0); + assert(macro_tile_height % 128 == 0); + + out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1; + out->alignment = MAX2(256, base_align); + out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * + align(slice_bytes, base_align); +} + +static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); + + rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); + rtex->size = rtex->cmask.offset + rtex->cmask.size; + + rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); +} + +static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + if (rtex->cmask_buffer) + return; + + assert(rtex->cmask.size == 0); + + r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); + + rtex->cmask_buffer = (struct r600_resource *) + r600_aligned_buffer_create(&rscreen->b, + R600_RESOURCE_FLAG_UNMAPPABLE, + PIPE_USAGE_DEFAULT, + rtex->cmask.size, + rtex->cmask.alignment); + if (rtex->cmask_buffer == NULL) { + rtex->cmask.size = 0; + return; + } + + /* update colorbuffer state bits */ + rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; + + rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); + + p_atomic_inc(&rscreen->compressed_colortex_counter); +} + +static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + unsigned cl_width, cl_height, width, height; + unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; + unsigned num_pipes = rscreen->info.num_tile_pipes; + + rtex->surface.htile_size = 0; + + if (rscreen->chip_class <= EVERGREEN && + rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26) + return; + + /* HW bug on R6xx. */ + if (rscreen->chip_class == R600 && + (rtex->resource.b.b.width0 > 7680 || + rtex->resource.b.b.height0 > 7680)) + return; + + switch (num_pipes) { + case 1: + cl_width = 32; + cl_height = 16; + break; + case 2: + cl_width = 32; + cl_height = 32; + break; + case 4: + cl_width = 64; + cl_height = 32; + break; + case 8: + cl_width = 64; + cl_height = 64; + break; + case 16: + cl_width = 128; + cl_height = 64; + break; + default: + assert(0); + return; + } + + width = align(rtex->resource.b.b.width0, cl_width * 8); + height = align(rtex->resource.b.b.height0, cl_height * 8); + + slice_elements = (width * height) / (8 * 8); + slice_bytes = slice_elements * 4; + + pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; + base_align = num_pipes * pipe_interleave_bytes; + + rtex->surface.htile_alignment = base_align; + rtex->surface.htile_size = + (util_max_layer(&rtex->resource.b.b, 0) + 1) * + align(slice_bytes, base_align); +} + +static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, + struct r600_texture *rtex) +{ + r600_texture_get_htile_size(rscreen, rtex); + + if (!rtex->surface.htile_size) + return; + + rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment); + rtex->size = rtex->htile_offset + rtex->surface.htile_size; +} + +void r600_print_texture_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, struct u_log_context *log) +{ + int i; + + /* Common parameters. */ + u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, " + "blk_h=%u, array_size=%u, last_level=%u, " + "bpe=%u, nsamples=%u, flags=0x%x, %s\n", + rtex->resource.b.b.width0, rtex->resource.b.b.height0, + rtex->resource.b.b.depth0, rtex->surface.blk_w, + rtex->surface.blk_h, + rtex->resource.b.b.array_size, rtex->resource.b.b.last_level, + rtex->surface.bpe, rtex->resource.b.b.nr_samples, + rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); + + u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " + "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", + rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw, + rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea, + rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config, + (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0); + + if (rtex->fmask.size) + u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, " + "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n", + rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment, + rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height, + rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index); + + if (rtex->cmask.size) + u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " + "slice_tile_max=%u\n", + rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment, + rtex->cmask.slice_tile_max); + + if (rtex->htile_offset) + u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", " + "alignment=%u\n", + rtex->htile_offset, rtex->surface.htile_size, + rtex->surface.htile_alignment); + + for (i = 0; i <= rtex->resource.b.b.last_level; i++) + u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " + "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " + "mode=%u, tiling_index = %u\n", + i, rtex->surface.u.legacy.level[i].offset, + rtex->surface.u.legacy.level[i].slice_size, + u_minify(rtex->resource.b.b.width0, i), + u_minify(rtex->resource.b.b.height0, i), + u_minify(rtex->resource.b.b.depth0, i), + rtex->surface.u.legacy.level[i].nblk_x, + rtex->surface.u.legacy.level[i].nblk_y, + rtex->surface.u.legacy.level[i].mode, + rtex->surface.u.legacy.tiling_index[i]); + + if (rtex->surface.has_stencil) { + u_log_printf(log, " StencilLayout: tilesplit=%u\n", + rtex->surface.u.legacy.stencil_tile_split); + for (i = 0; i <= rtex->resource.b.b.last_level; i++) { + u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", " + "slice_size=%"PRIu64", npix_x=%u, " + "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, " + "mode=%u, tiling_index = %u\n", + i, rtex->surface.u.legacy.stencil_level[i].offset, + rtex->surface.u.legacy.stencil_level[i].slice_size, + u_minify(rtex->resource.b.b.width0, i), + u_minify(rtex->resource.b.b.height0, i), + u_minify(rtex->resource.b.b.depth0, i), + rtex->surface.u.legacy.stencil_level[i].nblk_x, + rtex->surface.u.legacy.stencil_level[i].nblk_y, + rtex->surface.u.legacy.stencil_level[i].mode, + rtex->surface.u.legacy.stencil_tiling_index[i]); + } + } +} + +/* Common processing for r600_texture_create and r600_texture_from_handle */ +static struct r600_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + struct pb_buffer *buf, + struct radeon_surf *surface) +{ + struct r600_texture *rtex; + struct r600_resource *resource; + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + + rtex = CALLOC_STRUCT(r600_texture); + if (!rtex) + return NULL; + + resource = &rtex->resource; + resource->b.b = *base; + resource->b.b.next = NULL; + resource->b.vtbl = &r600_texture_vtbl; + pipe_reference_init(&resource->b.b.reference, 1); + resource->b.b.screen = screen; + + /* don't include stencil-only formats which we don't support for rendering */ + rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); + + rtex->surface = *surface; + rtex->size = rtex->surface.surf_size; + rtex->db_render_format = base->format; + + /* Tiled depth textures utilize the non-displayable tile order. + * This must be done after r600_setup_surface. + * Applies to R600-Cayman. */ + rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D; + /* Applies to GCN. */ + rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; + + if (rtex->is_depth) { + if (base->flags & (R600_RESOURCE_FLAG_TRANSFER | + R600_RESOURCE_FLAG_FLUSHED_DEPTH) || + rscreen->chip_class >= EVERGREEN) { + rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; + rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; + } else { + if (rtex->resource.b.b.nr_samples <= 1 && + (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || + rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT)) + rtex->can_sample_z = true; + } + + if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER | + R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { + rtex->db_compatible = true; + + if (!(rscreen->debug_flags & DBG_NO_HYPERZ)) + r600_texture_allocate_htile(rscreen, rtex); + } + } else { + if (base->nr_samples > 1) { + if (!buf) { + r600_texture_allocate_fmask(rscreen, rtex); + r600_texture_allocate_cmask(rscreen, rtex); + rtex->cmask_buffer = &rtex->resource; + } + if (!rtex->fmask.size || !rtex->cmask.size) { + FREE(rtex); + return NULL; + } + } + } + + /* Now create the backing buffer. */ + if (!buf) { + r600_init_resource_fields(rscreen, resource, rtex->size, + rtex->surface.surf_alignment); + + /* Displayable surfaces are not suballocated. */ + if (resource->b.b.bind & PIPE_BIND_SCANOUT) + resource->flags |= RADEON_FLAG_NO_SUBALLOC; + + if (!r600_alloc_resource(rscreen, resource)) { + FREE(rtex); + return NULL; + } + } else { + resource->buf = buf; + resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); + resource->bo_size = buf->size; + resource->bo_alignment = buf->alignment; + resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); + if (resource->domains & RADEON_DOMAIN_VRAM) + resource->vram_usage = buf->size; + else if (resource->domains & RADEON_DOMAIN_GTT) + resource->gart_usage = buf->size; + } + + if (rtex->cmask.size) { + /* Initialize the cmask to 0xCC (= compressed state). */ + r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, + rtex->cmask.offset, rtex->cmask.size, + 0xCCCCCCCC); + } + if (rtex->htile_offset) { + uint32_t clear_value = 0; + + r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, + rtex->htile_offset, + rtex->surface.htile_size, + clear_value); + } + + /* Initialize the CMASK base register value. */ + rtex->cmask.base_address_reg = + (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; + + if (rscreen->debug_flags & DBG_VM) { + fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", + rtex->resource.gpu_address, + rtex->resource.gpu_address + rtex->resource.buf->size, + base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1, + base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); + } + + if (rscreen->debug_flags & DBG_TEX) { + puts("Texture:"); + struct u_log_context log; + u_log_context_init(&log); + r600_print_texture_info(rscreen, rtex, &log); + u_log_new_page_print(&log, stdout); + fflush(stdout); + u_log_context_destroy(&log); + } + + return rtex; +} + +static enum radeon_surf_mode +r600_choose_tiling(struct r600_common_screen *rscreen, + const struct pipe_resource *templ) +{ + const struct util_format_description *desc = util_format_description(templ->format); + bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING; + bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) && + !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); + + /* MSAA resources must be 2D tiled. */ + if (templ->nr_samples > 1) + return RADEON_SURF_MODE_2D; + + /* Transfer resources should be linear. */ + if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + + /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */ + if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN && + (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) && + (templ->target == PIPE_TEXTURE_2D || + templ->target == PIPE_TEXTURE_3D)) + force_tiling = true; + + /* Handle common candidates for the linear mode. + * Compressed textures and DB surfaces must always be tiled. + */ + if (!force_tiling && + !is_depth_stencil && + !util_format_is_compressed(templ->format)) { + if (rscreen->debug_flags & DBG_NO_TILING) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + + /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + + if (templ->bind & PIPE_BIND_LINEAR) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + + /* Textures with a very small height are recommended to be linear. */ + if (templ->target == PIPE_TEXTURE_1D || + templ->target == PIPE_TEXTURE_1D_ARRAY || + /* Only very thin and long 2D textures should benefit from + * linear_aligned. */ + (templ->width0 > 8 && templ->height0 <= 2)) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + + /* Textures likely to be mapped often. */ + if (templ->usage == PIPE_USAGE_STAGING || + templ->usage == PIPE_USAGE_STREAM) + return RADEON_SURF_MODE_LINEAR_ALIGNED; + } + + /* Make small textures 1D tiled. */ + if (templ->width0 <= 16 || templ->height0 <= 16 || + (rscreen->debug_flags & DBG_NO_2D_TILING)) + return RADEON_SURF_MODE_1D; + + /* The allocator will switch to 1D if needed. */ + return RADEON_SURF_MODE_2D; +} + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_surf surface = {0}; + bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; + int r; + + r = r600_init_surface(rscreen, &surface, templ, + r600_choose_tiling(rscreen, templ), 0, 0, + false, false, is_flushed_depth); + if (r) { + return NULL; + } + + return (struct pipe_resource *) + r600_texture_create_object(screen, templ, NULL, &surface); +} + +static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle, + unsigned usage) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct pb_buffer *buf = NULL; + unsigned stride = 0, offset = 0; + enum radeon_surf_mode array_mode; + struct radeon_surf surface = {}; + int r; + struct radeon_bo_metadata metadata = {}; + struct r600_texture *rtex; + bool is_scanout; + + /* Support only 2D textures without mipmaps */ + if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || + templ->depth0 != 1 || templ->last_level != 0) + return NULL; + + buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); + if (!buf) + return NULL; + + rscreen->ws->buffer_get_metadata(buf, &metadata); + r600_surface_import_metadata(rscreen, &surface, &metadata, + &array_mode, &is_scanout); + + r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, + offset, true, is_scanout, false); + if (r) { + return NULL; + } + + rtex = r600_texture_create_object(screen, templ, buf, &surface); + if (!rtex) + return NULL; + + rtex->resource.b.is_shared = true; + rtex->resource.external_usage = usage; + + if (rscreen->apply_opaque_metadata) + rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); + + assert(rtex->surface.tile_swizzle == 0); + return &rtex->resource.b.b; +} + +bool r600_init_flushed_depth_texture(struct pipe_context *ctx, + struct pipe_resource *texture, + struct r600_texture **staging) +{ + struct r600_texture *rtex = (struct r600_texture*)texture; + struct pipe_resource resource; + struct r600_texture **flushed_depth_texture = staging ? + staging : &rtex->flushed_depth_texture; + enum pipe_format pipe_format = texture->format; + + if (!staging) { + if (rtex->flushed_depth_texture) + return true; /* it's ready */ + + if (!rtex->can_sample_z && rtex->can_sample_s) { + switch (pipe_format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + /* Save memory by not allocating the S plane. */ + pipe_format = PIPE_FORMAT_Z32_FLOAT; + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + /* Save memory bandwidth by not copying the + * stencil part during flush. + * + * This potentially increases memory bandwidth + * if an application uses both Z and S texturing + * simultaneously (a flushed Z24S8 texture + * would be stored compactly), but how often + * does that really happen? + */ + pipe_format = PIPE_FORMAT_Z24X8_UNORM; + break; + default:; + } + } else if (!rtex->can_sample_s && rtex->can_sample_z) { + assert(util_format_has_stencil(util_format_description(pipe_format))); + + /* DB->CB copies to an 8bpp surface don't work. */ + pipe_format = PIPE_FORMAT_X24S8_UINT; + } + } + + memset(&resource, 0, sizeof(resource)); + resource.target = texture->target; + resource.format = pipe_format; + resource.width0 = texture->width0; + resource.height0 = texture->height0; + resource.depth0 = texture->depth0; + resource.array_size = texture->array_size; + resource.last_level = texture->last_level; + resource.nr_samples = texture->nr_samples; + resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; + resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; + resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; + + if (staging) + resource.flags |= R600_RESOURCE_FLAG_TRANSFER; + + *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource); + if (*flushed_depth_texture == NULL) { + R600_ERR("failed to create temporary texture to hold flushed depth\n"); + return false; + } + + (*flushed_depth_texture)->non_disp_tiling = false; + return true; +} + +/** + * Initialize the pipe_resource descriptor to be of the same size as the box, + * which is supposed to hold a subregion of the texture "orig" at the given + * mipmap level. + */ +static void r600_init_temp_resource_from_box(struct pipe_resource *res, + struct pipe_resource *orig, + const struct pipe_box *box, + unsigned level, unsigned flags) +{ + memset(res, 0, sizeof(*res)); + res->format = orig->format; + res->width0 = box->width; + res->height0 = box->height; + res->depth0 = 1; + res->array_size = 1; + res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT; + res->flags = flags; + + /* We must set the correct texture target and dimensions for a 3D box. */ + if (box->depth > 1 && util_max_layer(orig, level) > 0) { + res->target = PIPE_TEXTURE_2D_ARRAY; + res->array_size = box->depth; + } else { + res->target = PIPE_TEXTURE_2D; + } +} + +static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + unsigned transfer_usage, + const struct pipe_box *box) +{ + /* r600g doesn't react to dirty_tex_descriptor_counter */ + return rscreen->chip_class >= SI && + !rtex->resource.b.is_shared && + !(transfer_usage & PIPE_TRANSFER_READ) && + rtex->resource.b.b.last_level == 0 && + util_texrange_covers_whole_level(&rtex->resource.b.b, 0, + box->x, box->y, box->z, + box->width, box->height, + box->depth); +} + +static void r600_texture_invalidate_storage(struct r600_common_context *rctx, + struct r600_texture *rtex) +{ + struct r600_common_screen *rscreen = rctx->screen; + + /* There is no point in discarding depth and tiled buffers. */ + assert(!rtex->is_depth); + assert(rtex->surface.is_linear); + + /* Reallocate the buffer in the same pipe_resource. */ + r600_alloc_resource(rscreen, &rtex->resource); + + /* Initialize the CMASK base address (needed even without CMASK). */ + rtex->cmask.base_address_reg = + (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; + + p_atomic_inc(&rscreen->dirty_tex_counter); + + rctx->num_alloc_tex_transfer_bytes += rtex->size; +} + +static void *r600_texture_transfer_map(struct pipe_context *ctx, + struct pipe_resource *texture, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_texture *rtex = (struct r600_texture*)texture; + struct r600_transfer *trans; + struct r600_resource *buf; + unsigned offset = 0; + char *map; + bool use_staging_texture = false; + + assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER)); + assert(box->width && box->height && box->depth); + + /* Depth textures use staging unconditionally. */ + if (!rtex->is_depth) { + /* Degrade the tile mode if we get too many transfers on APUs. + * On dGPUs, the staging texture is always faster. + * Only count uploads that are at least 4x4 pixels large. + */ + if (!rctx->screen->info.has_dedicated_vram && + level == 0 && + box->width >= 4 && box->height >= 4 && + p_atomic_inc_return(&rtex->num_level0_transfers) == 10) { + bool can_invalidate = + r600_can_invalidate_texture(rctx->screen, rtex, + usage, box); + + r600_reallocate_texture_inplace(rctx, rtex, + PIPE_BIND_LINEAR, + can_invalidate); + } + + /* Tiled textures need to be converted into a linear texture for CPU + * access. The staging texture is always linear and is placed in GART. + * + * Reading from VRAM or GTT WC is slow, always use the staging + * texture in this case. + * + * Use the staging texture for uploads if the underlying BO + * is busy. + */ + if (!rtex->surface.is_linear) + use_staging_texture = true; + else if (usage & PIPE_TRANSFER_READ) + use_staging_texture = + rtex->resource.domains & RADEON_DOMAIN_VRAM || + rtex->resource.flags & RADEON_FLAG_GTT_WC; + /* Write & linear only: */ + else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, + RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(rtex->resource.buf, 0, + RADEON_USAGE_READWRITE)) { + /* It's busy. */ + if (r600_can_invalidate_texture(rctx->screen, rtex, + usage, box)) + r600_texture_invalidate_storage(rctx, rtex); + else + use_staging_texture = true; + } + } + + trans = CALLOC_STRUCT(r600_transfer); + if (!trans) + return NULL; + pipe_resource_reference(&trans->b.b.resource, texture); + trans->b.b.level = level; + trans->b.b.usage = usage; + trans->b.b.box = *box; + + if (rtex->is_depth) { + struct r600_texture *staging_depth; + + if (rtex->resource.b.b.nr_samples > 1) { + /* MSAA depth buffers need to be converted to single sample buffers. + * + * Mapping MSAA depth buffers can occur if ReadPixels is called + * with a multisample GLX visual. + * + * First downsample the depth buffer to a temporary texture, + * then decompress the temporary one to staging. + * + * Only the region being mapped is transfered. + */ + struct pipe_resource resource; + + r600_init_temp_resource_from_box(&resource, texture, box, level, 0); + + if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + FREE(trans); + return NULL; + } + + if (usage & PIPE_TRANSFER_READ) { + struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); + if (!temp) { + R600_ERR("failed to create a temporary depth texture\n"); + FREE(trans); + return NULL; + } + + r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); + rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, + 0, 0, 0, box->depth, 0, 0); + pipe_resource_reference(&temp, NULL); + } + + /* Just get the strides. */ + r600_texture_get_offset(rctx->screen, staging_depth, level, NULL, + &trans->b.b.stride, + &trans->b.b.layer_stride); + } else { + /* XXX: only readback the rectangle which is being mapped? */ + /* XXX: when discard is true, no need to read back from depth texture */ + if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + FREE(trans); + return NULL; + } + + rctx->blit_decompress_depth(ctx, rtex, staging_depth, + level, level, + box->z, box->z + box->depth - 1, + 0, 0); + + offset = r600_texture_get_offset(rctx->screen, staging_depth, + level, box, + &trans->b.b.stride, + &trans->b.b.layer_stride); + } + + trans->staging = (struct r600_resource*)staging_depth; + buf = trans->staging; + } else if (use_staging_texture) { + struct pipe_resource resource; + struct r600_texture *staging; + + r600_init_temp_resource_from_box(&resource, texture, box, level, + R600_RESOURCE_FLAG_TRANSFER); + resource.usage = (usage & PIPE_TRANSFER_READ) ? + PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; + + /* Create the temporary texture. */ + staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); + if (!staging) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + FREE(trans); + return NULL; + } + trans->staging = &staging->resource; + + /* Just get the strides. */ + r600_texture_get_offset(rctx->screen, staging, 0, NULL, + &trans->b.b.stride, + &trans->b.b.layer_stride); + + if (usage & PIPE_TRANSFER_READ) + r600_copy_to_staging_texture(ctx, trans); + else + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + + buf = trans->staging; + } else { + /* the resource is mapped directly */ + offset = r600_texture_get_offset(rctx->screen, rtex, level, box, + &trans->b.b.stride, + &trans->b.b.layer_stride); + buf = &rtex->resource; + } + + if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) { + r600_resource_reference(&trans->staging, NULL); + FREE(trans); + return NULL; + } + + *ptransfer = &trans->b.b; + return map + offset; +} + +static void r600_texture_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer* transfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct pipe_resource *texture = transfer->resource; + struct r600_texture *rtex = (struct r600_texture*)texture; + + if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) { + if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) { + ctx->resource_copy_region(ctx, texture, transfer->level, + transfer->box.x, transfer->box.y, transfer->box.z, + &rtransfer->staging->b.b, transfer->level, + &transfer->box); + } else { + r600_copy_from_staging_texture(ctx, rtransfer); + } + } + + if (rtransfer->staging) { + rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size; + r600_resource_reference(&rtransfer->staging, NULL); + } + + /* Heuristic for {upload, draw, upload, draw, ..}: + * + * Flush the gfx IB if we've allocated too much texture storage. + * + * The idea is that we don't want to build IBs that use too much + * memory and put pressure on the kernel memory manager and we also + * want to make temporary and invalidated buffers go idle ASAP to + * decrease the total memory usage or make them reusable. The memory + * usage will be slightly higher than given here because of the buffer + * cache in the winsys. + * + * The result is that the kernel memory manager is never a bottleneck. + */ + if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) { + rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL); + rctx->num_alloc_tex_transfer_bytes = 0; + } + + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + +static const struct u_resource_vtbl r600_texture_vtbl = +{ + NULL, /* get_handle */ + r600_texture_destroy, /* resource_destroy */ + r600_texture_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + r600_texture_transfer_unmap, /* transfer_unmap */ +}; + +struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_surface *templ, + unsigned width0, unsigned height0, + unsigned width, unsigned height) +{ + struct r600_surface *surface = CALLOC_STRUCT(r600_surface); + + if (!surface) + return NULL; + + assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level)); + assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level)); + + pipe_reference_init(&surface->base.reference, 1); + pipe_resource_reference(&surface->base.texture, texture); + surface->base.context = pipe; + surface->base.format = templ->format; + surface->base.width = width; + surface->base.height = height; + surface->base.u = templ->u; + + surface->width0 = width0; + surface->height0 = height0; + + return &surface->base; +} + +static struct pipe_surface *r600_create_surface(struct pipe_context *pipe, + struct pipe_resource *tex, + const struct pipe_surface *templ) +{ + unsigned level = templ->u.tex.level; + unsigned width = u_minify(tex->width0, level); + unsigned height = u_minify(tex->height0, level); + unsigned width0 = tex->width0; + unsigned height0 = tex->height0; + + if (tex->target != PIPE_BUFFER && templ->format != tex->format) { + const struct util_format_description *tex_desc + = util_format_description(tex->format); + const struct util_format_description *templ_desc + = util_format_description(templ->format); + + assert(tex_desc->block.bits == templ_desc->block.bits); + + /* Adjust size of surface if and only if the block width or + * height is changed. */ + if (tex_desc->block.width != templ_desc->block.width || + tex_desc->block.height != templ_desc->block.height) { + unsigned nblks_x = util_format_get_nblocksx(tex->format, width); + unsigned nblks_y = util_format_get_nblocksy(tex->format, height); + + width = nblks_x * templ_desc->block.width; + height = nblks_y * templ_desc->block.height; + + width0 = util_format_get_nblocksx(tex->format, width0); + height0 = util_format_get_nblocksy(tex->format, height0); + } + } + + return r600_create_surface_custom(pipe, tex, templ, + width0, height0, + width, height); +} + +static void r600_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surface) +{ + struct r600_surface *surf = (struct r600_surface*)surface; + r600_resource_reference(&surf->cb_buffer_fmask, NULL); + r600_resource_reference(&surf->cb_buffer_cmask, NULL); + pipe_resource_reference(&surface->texture, NULL); + FREE(surface); +} + +static void r600_clear_texture(struct pipe_context *pipe, + struct pipe_resource *tex, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct pipe_screen *screen = pipe->screen; + struct r600_texture *rtex = (struct r600_texture*)tex; + struct pipe_surface tmpl = {{0}}; + struct pipe_surface *sf; + const struct util_format_description *desc = + util_format_description(tex->format); + + tmpl.format = tex->format; + tmpl.u.tex.first_layer = box->z; + tmpl.u.tex.last_layer = box->z + box->depth - 1; + tmpl.u.tex.level = level; + sf = pipe->create_surface(pipe, tex, &tmpl); + if (!sf) + return; + + if (rtex->is_depth) { + unsigned clear; + float depth; + uint8_t stencil = 0; + + /* Depth is always present. */ + clear = PIPE_CLEAR_DEPTH; + desc->unpack_z_float(&depth, 0, data, 0, 1, 1); + + if (rtex->surface.has_stencil) { + clear |= PIPE_CLEAR_STENCIL; + desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); + } + + pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil, + box->x, box->y, + box->width, box->height, false); + } else { + union pipe_color_union color; + + /* pipe_color_union requires the full vec4 representation. */ + if (util_format_is_pure_uint(tex->format)) + desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); + else if (util_format_is_pure_sint(tex->format)) + desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); + else + desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); + + if (screen->is_format_supported(screen, tex->format, + tex->target, 0, + PIPE_BIND_RENDER_TARGET)) { + pipe->clear_render_target(pipe, sf, &color, + box->x, box->y, + box->width, box->height, false); + } else { + /* Software fallback - just for R9G9B9E5_FLOAT */ + util_clear_render_target(pipe, sf, &color, + box->x, box->y, + box->width, box->height); + } + } + pipe_surface_reference(&sf, NULL); +} + +unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap) +{ + const struct util_format_description *desc = util_format_description(format); + +#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz) + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ + return V_0280A0_SWAP_STD; + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return ~0U; + + switch (desc->nr_channels) { + case 1: + if (HAS_SWIZZLE(0,X)) + return V_0280A0_SWAP_STD; /* X___ */ + else if (HAS_SWIZZLE(3,X)) + return V_0280A0_SWAP_ALT_REV; /* ___X */ + break; + case 2: + if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) || + (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) || + (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y))) + return V_0280A0_SWAP_STD; /* XY__ */ + else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) || + (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) || + (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X))) + /* YX__ */ + return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV); + else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y)) + return V_0280A0_SWAP_ALT; /* X__Y */ + else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X)) + return V_0280A0_SWAP_ALT_REV; /* Y__X */ + break; + case 3: + if (HAS_SWIZZLE(0,X)) + return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD); + else if (HAS_SWIZZLE(0,Z)) + return V_0280A0_SWAP_STD_REV; /* ZYX */ + break; + case 4: + /* check the middle channels, the 1st and 4th channel can be NONE */ + if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) { + return V_0280A0_SWAP_STD; /* XYZW */ + } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) { + return V_0280A0_SWAP_STD_REV; /* WZYX */ + } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) { + return V_0280A0_SWAP_ALT; /* ZYXW */ + } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) { + /* YZWX */ + if (desc->is_array) + return V_0280A0_SWAP_ALT_REV; + else + return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV); + } + break; + } + return ~0U; +} + +/* FAST COLOR CLEAR */ + +static void evergreen_set_clear_color(struct r600_texture *rtex, + enum pipe_format surface_format, + const union pipe_color_union *color) +{ + union util_color uc; + + memset(&uc, 0, sizeof(uc)); + + if (rtex->surface.bpe == 16) { + /* DCC fast clear only: + * CLEAR_WORD0 = R = G = B + * CLEAR_WORD1 = A + */ + assert(color->ui[0] == color->ui[1] && + color->ui[0] == color->ui[2]); + uc.ui[0] = color->ui[0]; + uc.ui[1] = color->ui[3]; + } else if (util_format_is_pure_uint(surface_format)) { + util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1); + } else if (util_format_is_pure_sint(surface_format)) { + util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1); + } else { + util_pack_color(color->f, surface_format, &uc); + } + + memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); +} + +void evergreen_do_fast_color_clear(struct r600_common_context *rctx, + struct pipe_framebuffer_state *fb, + struct r600_atom *fb_state, + unsigned *buffers, ubyte *dirty_cbufs, + const union pipe_color_union *color) +{ + int i; + + /* This function is broken in BE, so just disable this path for now */ +#ifdef PIPE_ARCH_BIG_ENDIAN + return; +#endif + + if (rctx->render_cond) + return; + + for (i = 0; i < fb->nr_cbufs; i++) { + struct r600_texture *tex; + unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; + + if (!fb->cbufs[i]) + continue; + + /* if this colorbuffer is not being cleared */ + if (!(*buffers & clear_bit)) + continue; + + tex = (struct r600_texture *)fb->cbufs[i]->texture; + + /* the clear is allowed if all layers are bound */ + if (fb->cbufs[i]->u.tex.first_layer != 0 || + fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) { + continue; + } + + /* cannot clear mipmapped textures */ + if (fb->cbufs[i]->texture->last_level != 0) { + continue; + } + + /* only supported on tiled surfaces */ + if (tex->surface.is_linear) { + continue; + } + + /* shared textures can't use fast clear without an explicit flush, + * because there is no way to communicate the clear color among + * all clients + */ + if (tex->resource.b.is_shared && + !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) + continue; + + { + /* 128-bit formats are unusupported */ + if (tex->surface.bpe > 8) { + continue; + } + + /* ensure CMASK is enabled */ + r600_texture_alloc_cmask_separate(rctx->screen, tex); + if (tex->cmask.size == 0) { + continue; + } + + /* Do the fast clear. */ + rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, + tex->cmask.offset, tex->cmask.size, 0, + R600_COHERENCY_CB_META); + + bool need_compressed_update = !tex->dirty_level_mask; + + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + + if (need_compressed_update) + p_atomic_inc(&rctx->screen->compressed_colortex_counter); + } + + evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); + + if (dirty_cbufs) + *dirty_cbufs |= 1 << i; + rctx->set_atom_dirty(rctx, fb_state, true); + *buffers &= ~clear_bit; + } +} + +static struct pipe_memory_object * +r600_memobj_from_handle(struct pipe_screen *screen, + struct winsys_handle *whandle, + bool dedicated) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object); + struct pb_buffer *buf = NULL; + uint32_t stride, offset; + + if (!memobj) + return NULL; + + buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, + &stride, &offset); + if (!buf) { + free(memobj); + return NULL; + } + + memobj->b.dedicated = dedicated; + memobj->buf = buf; + memobj->stride = stride; + memobj->offset = offset; + + return (struct pipe_memory_object *)memobj; + +} + +static void +r600_memobj_destroy(struct pipe_screen *screen, + struct pipe_memory_object *_memobj) +{ + struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; + + pb_reference(&memobj->buf, NULL); + free(memobj); +} + +static struct pipe_resource * +r600_texture_from_memobj(struct pipe_screen *screen, + const struct pipe_resource *templ, + struct pipe_memory_object *_memobj, + uint64_t offset) +{ + int r; + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; + struct r600_texture *rtex; + struct radeon_surf surface = {}; + struct radeon_bo_metadata metadata = {}; + enum radeon_surf_mode array_mode; + bool is_scanout; + struct pb_buffer *buf = NULL; + + if (memobj->b.dedicated) { + rscreen->ws->buffer_get_metadata(memobj->buf, &metadata); + r600_surface_import_metadata(rscreen, &surface, &metadata, + &array_mode, &is_scanout); + } else { + /** + * The bo metadata is unset for un-dedicated images. So we fall + * back to linear. See answer to question 5 of the + * VK_KHX_external_memory spec for some details. + * + * It is possible that this case isn't going to work if the + * surface pitch isn't correctly aligned by default. + * + * In order to support it correctly we require multi-image + * metadata to be syncrhonized between radv and radeonsi. The + * semantics of associating multiple image metadata to a memory + * object on the vulkan export side are not concretely defined + * either. + * + * All the use cases we are aware of at the moment for memory + * objects use dedicated allocations. So lets keep the initial + * implementation simple. + * + * A possible alternative is to attempt to reconstruct the + * tiling information when the TexParameter TEXTURE_TILING_EXT + * is set. + */ + array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + is_scanout = false; + + } + + r = r600_init_surface(rscreen, &surface, templ, + array_mode, memobj->stride, + offset, true, is_scanout, + false); + if (r) + return NULL; + + rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface); + if (!rtex) + return NULL; + + /* r600_texture_create_object doesn't increment refcount of + * memobj->buf, so increment it here. + */ + pb_reference(&buf, memobj->buf); + + rtex->resource.b.is_shared = true; + rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE; + + if (rscreen->apply_opaque_metadata) + rscreen->apply_opaque_metadata(rscreen, rtex, &metadata); + + return &rtex->resource.b.b; +} + +void r600_init_screen_texture_functions(struct r600_common_screen *rscreen) +{ + rscreen->b.resource_from_handle = r600_texture_from_handle; + rscreen->b.resource_get_handle = r600_texture_get_handle; + rscreen->b.resource_from_memobj = r600_texture_from_memobj; + rscreen->b.memobj_create_from_handle = r600_memobj_from_handle; + rscreen->b.memobj_destroy = r600_memobj_destroy; +} + +void r600_init_context_texture_functions(struct r600_common_context *rctx) +{ + rctx->b.create_surface = r600_create_surface; + rctx->b.surface_destroy = r600_surface_destroy; + rctx->b.clear_texture = r600_clear_texture; +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600_viewport.c b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c new file mode 100644 index 000000000..0797f932f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c @@ -0,0 +1,456 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "r600_cs.h" +#include "util/u_viewport.h" +#include "tgsi/tgsi_scan.h" + +#define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C +#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8 +#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C + +#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250 +#define S_028250_TL_X(x) (((unsigned)(x) & 0x7FFF) << 0) +#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF) +#define C_028250_TL_X 0xFFFF8000 +#define S_028250_TL_Y(x) (((unsigned)(x) & 0x7FFF) << 16) +#define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF) +#define C_028250_TL_Y 0x8000FFFF +#define S_028250_WINDOW_OFFSET_DISABLE(x) (((unsigned)(x) & 0x1) << 31) +#define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1) +#define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF +#define S_028254_BR_X(x) (((unsigned)(x) & 0x7FFF) << 0) +#define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF) +#define C_028254_BR_X 0xFFFF8000 +#define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16) +#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF) +#define C_028254_BR_Y 0x8000FFFF +#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0 +#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4 + +#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192) + +static void r600_set_scissor_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *state) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + int i; + + for (i = 0; i < num_scissors; i++) + rctx->scissors.states[start_slot + i] = state[i]; + + if (!rctx->scissor_enabled) + return; + + rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; + rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true); +} + +/* Since the guard band disables clipping, we have to clip per-pixel + * using a scissor. + */ +static void r600_get_scissor_from_viewport(struct r600_common_context *rctx, + const struct pipe_viewport_state *vp, + struct r600_signed_scissor *scissor) +{ + float tmp, minx, miny, maxx, maxy; + + /* Convert (-1, -1) and (1, 1) from clip space into window space. */ + minx = -vp->scale[0] + vp->translate[0]; + miny = -vp->scale[1] + vp->translate[1]; + maxx = vp->scale[0] + vp->translate[0]; + maxy = vp->scale[1] + vp->translate[1]; + + /* r600_draw_rectangle sets this. Disable the scissor. */ + if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) { + scissor->minx = scissor->miny = 0; + scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx); + return; + } + + /* Handle inverted viewports. */ + if (minx > maxx) { + tmp = minx; + minx = maxx; + maxx = tmp; + } + if (miny > maxy) { + tmp = miny; + miny = maxy; + maxy = tmp; + } + + /* Convert to integer and round up the max bounds. */ + scissor->minx = minx; + scissor->miny = miny; + scissor->maxx = ceilf(maxx); + scissor->maxy = ceilf(maxy); +} + +static void r600_clamp_scissor(struct r600_common_context *rctx, + struct pipe_scissor_state *out, + struct r600_signed_scissor *scissor) +{ + unsigned max_scissor = GET_MAX_SCISSOR(rctx); + out->minx = CLAMP(scissor->minx, 0, max_scissor); + out->miny = CLAMP(scissor->miny, 0, max_scissor); + out->maxx = CLAMP(scissor->maxx, 0, max_scissor); + out->maxy = CLAMP(scissor->maxy, 0, max_scissor); +} + +static void r600_clip_scissor(struct pipe_scissor_state *out, + struct pipe_scissor_state *clip) +{ + out->minx = MAX2(out->minx, clip->minx); + out->miny = MAX2(out->miny, clip->miny); + out->maxx = MIN2(out->maxx, clip->maxx); + out->maxy = MIN2(out->maxy, clip->maxy); +} + +static void r600_scissor_make_union(struct r600_signed_scissor *out, + struct r600_signed_scissor *in) +{ + out->minx = MIN2(out->minx, in->minx); + out->miny = MIN2(out->miny, in->miny); + out->maxx = MAX2(out->maxx, in->maxx); + out->maxy = MAX2(out->maxy, in->maxy); +} + +void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx, + struct pipe_scissor_state *scissor) +{ + if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) { + if (scissor->maxx == 0) + scissor->minx = 1; + if (scissor->maxy == 0) + scissor->miny = 1; + + if (rctx->chip_class == CAYMAN && + scissor->maxx == 1 && scissor->maxy == 1) + scissor->maxx = 2; + } +} + +static void r600_emit_one_scissor(struct r600_common_context *rctx, + struct radeon_winsys_cs *cs, + struct r600_signed_scissor *vp_scissor, + struct pipe_scissor_state *scissor) +{ + struct pipe_scissor_state final; + + if (rctx->vs_disables_clipping_viewport) { + final.minx = final.miny = 0; + final.maxx = final.maxy = GET_MAX_SCISSOR(rctx); + } else { + r600_clamp_scissor(rctx, &final, vp_scissor); + } + + if (scissor) + r600_clip_scissor(&final, scissor); + + evergreen_apply_scissor_bug_workaround(rctx, &final); + + radeon_emit(cs, S_028250_TL_X(final.minx) | + S_028250_TL_Y(final.miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(final.maxx) | + S_028254_BR_Y(final.maxy)); +} + +/* the range is [-MAX, MAX] */ +#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384) + +static void r600_emit_guardband(struct r600_common_context *rctx, + struct r600_signed_scissor *vp_as_scissor) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct pipe_viewport_state vp; + float left, top, right, bottom, max_range, guardband_x, guardband_y; + + /* Reconstruct the viewport transformation from the scissor. */ + vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; + vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; + vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; + vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; + + /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ + if (vp_as_scissor->minx == vp_as_scissor->maxx) + vp.scale[0] = 0.5; + if (vp_as_scissor->miny == vp_as_scissor->maxy) + vp.scale[1] = 0.5; + + /* Find the biggest guard band that is inside the supported viewport + * range. The guard band is specified as a horizontal and vertical + * distance from (0,0) in clip space. + * + * This is done by applying the inverse viewport transformation + * on the viewport limits to get those limits in clip space. + * + * Use a limit one pixel smaller to allow for some precision error. + */ + max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1; + left = (-max_range - vp.translate[0]) / vp.scale[0]; + right = ( max_range - vp.translate[0]) / vp.scale[0]; + top = (-max_range - vp.translate[1]) / vp.scale[1]; + bottom = ( max_range - vp.translate[1]) / vp.scale[1]; + + assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); + + guardband_x = MIN2(-left, right); + guardband_y = MIN2(-top, bottom); + + /* If any of the GB registers is updated, all of them must be updated. */ + if (rctx->chip_class >= CAYMAN) + radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4); + else + radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4); + + radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */ + radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */ + radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */ + radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ +} + +static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct pipe_scissor_state *states = rctx->scissors.states; + unsigned mask = rctx->scissors.dirty_mask; + bool scissor_enabled = rctx->scissor_enabled; + struct r600_signed_scissor max_vp_scissor; + int i; + + /* The simple case: Only 1 viewport is active. */ + if (!rctx->vs_writes_viewport_index) { + struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0]; + + if (!(mask & 1)) + return; + + radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); + r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL); + r600_emit_guardband(rctx, vp); + rctx->scissors.dirty_mask &= ~1; /* clear one bit */ + return; + } + + /* Shaders can draw to any viewport. Make a union of all viewports. */ + max_vp_scissor = rctx->viewports.as_scissor[0]; + for (i = 1; i < R600_MAX_VIEWPORTS; i++) + r600_scissor_make_union(&max_vp_scissor, + &rctx->viewports.as_scissor[i]); + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + + start * 4 * 2, count * 2); + for (i = start; i < start+count; i++) { + r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i], + scissor_enabled ? &states[i] : NULL); + } + } + r600_emit_guardband(rctx, &max_vp_scissor); + rctx->scissors.dirty_mask = 0; +} + +static void r600_set_viewport_states(struct pipe_context *ctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *state) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + unsigned mask; + int i; + + for (i = 0; i < num_viewports; i++) { + unsigned index = start_slot + i; + + rctx->viewports.states[index] = state[i]; + r600_get_scissor_from_viewport(rctx, &state[i], + &rctx->viewports.as_scissor[index]); + } + + mask = ((1 << num_viewports) - 1) << start_slot; + rctx->viewports.dirty_mask |= mask; + rctx->viewports.depth_range_dirty_mask |= mask; + rctx->scissors.dirty_mask |= mask; + rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true); + rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true); +} + +static void r600_emit_one_viewport(struct r600_common_context *rctx, + struct pipe_viewport_state *state) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + + radeon_emit(cs, fui(state->scale[0])); + radeon_emit(cs, fui(state->translate[0])); + radeon_emit(cs, fui(state->scale[1])); + radeon_emit(cs, fui(state->translate[1])); + radeon_emit(cs, fui(state->scale[2])); + radeon_emit(cs, fui(state->translate[2])); +} + +static void r600_emit_viewports(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct pipe_viewport_state *states = rctx->viewports.states; + unsigned mask = rctx->viewports.dirty_mask; + + /* The simple case: Only 1 viewport is active. */ + if (!rctx->vs_writes_viewport_index) { + if (!(mask & 1)) + return; + + radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); + r600_emit_one_viewport(rctx, &states[0]); + rctx->viewports.dirty_mask &= ~1; /* clear one bit */ + return; + } + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + + start * 4 * 6, count * 6); + for (i = start; i < start+count; i++) + r600_emit_one_viewport(rctx, &states[i]); + } + rctx->viewports.dirty_mask = 0; +} + +static void r600_emit_depth_ranges(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->gfx.cs; + struct pipe_viewport_state *states = rctx->viewports.states; + unsigned mask = rctx->viewports.depth_range_dirty_mask; + float zmin, zmax; + + /* The simple case: Only 1 viewport is active. */ + if (!rctx->vs_writes_viewport_index) { + if (!(mask & 1)) + return; + + util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax); + + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); + radeon_emit(cs, fui(zmin)); + radeon_emit(cs, fui(zmax)); + rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */ + return; + } + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + + start * 4 * 2, count * 2); + for (i = start; i < start+count; i++) { + util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax); + radeon_emit(cs, fui(zmin)); + radeon_emit(cs, fui(zmax)); + } + } + rctx->viewports.depth_range_dirty_mask = 0; +} + +static void r600_emit_viewport_states(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + r600_emit_viewports(rctx); + r600_emit_depth_ranges(rctx); +} + +/* Set viewport dependencies on pipe_rasterizer_state. */ +void r600_viewport_set_rast_deps(struct r600_common_context *rctx, + bool scissor_enable, bool clip_halfz) +{ + if (rctx->scissor_enabled != scissor_enable) { + rctx->scissor_enabled = scissor_enable; + rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; + rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true); + } + if (rctx->clip_halfz != clip_halfz) { + rctx->clip_halfz = clip_halfz; + rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; + rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true); + } +} + +/** + * Normally, we only emit 1 viewport and 1 scissor if no shader is using + * the VIEWPORT_INDEX output, and emitting the other viewports and scissors + * is delayed. When a shader with VIEWPORT_INDEX appears, this should be + * called to emit the rest. + */ +void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx, + struct tgsi_shader_info *info) +{ + bool vs_window_space; + + if (!info) + return; + + /* When the VS disables clipping and viewport transformation. */ + vs_window_space = + info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; + + if (rctx->vs_disables_clipping_viewport != vs_window_space) { + rctx->vs_disables_clipping_viewport = vs_window_space; + rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; + rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true); + } + + /* Viewport index handling. */ + rctx->vs_writes_viewport_index = info->writes_viewport_index; + if (!rctx->vs_writes_viewport_index) + return; + + if (rctx->scissors.dirty_mask) + rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true); + + if (rctx->viewports.dirty_mask || + rctx->viewports.depth_range_dirty_mask) + rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true); +} + +void r600_init_viewport_functions(struct r600_common_context *rctx) +{ + rctx->scissors.atom.emit = r600_emit_scissors; + rctx->viewports.atom.emit = r600_emit_viewport_states; + + rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6; + rctx->viewports.atom.num_dw = 2 + 16 * 6; + + rctx->b.set_scissor_states = r600_set_scissor_states; + rctx->b.set_viewport_states = r600_set_viewport_states; +} diff --git a/lib/mesa/src/gallium/drivers/r600/r600d_common.h b/lib/mesa/src/gallium/drivers/r600/r600d_common.h new file mode 100644 index 000000000..ed1d46076 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/r600d_common.h @@ -0,0 +1,135 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + */ + +#ifndef R600D_COMMON_H +#define R600D_COMMON_H + +#define R600_CONFIG_REG_OFFSET 0x08000 +#define R600_CONTEXT_REG_OFFSET 0x28000 +#define SI_SH_REG_OFFSET 0x0000B000 +#define SI_SH_REG_END 0x0000C000 +#define CIK_UCONFIG_REG_OFFSET 0x00030000 +#define CIK_UCONFIG_REG_END 0x00038000 + +#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) +#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) +#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8) +#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) +#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) + +#define PKT3_NOP 0x10 +#define PKT3_SET_PREDICATION 0x20 +#define PKT3_STRMOUT_BUFFER_UPDATE 0x34 +#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1 +#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1) +#define STRMOUT_OFFSET_FROM_PACKET 0 +#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1 +#define STRMOUT_OFFSET_FROM_MEM 2 +#define STRMOUT_OFFSET_NONE 3 +#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8) +#define PKT3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_EQUAL 3 +#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4) +#define PKT3_COPY_DATA 0x40 +#define COPY_DATA_SRC_SEL(x) ((x) & 0xf) +#define COPY_DATA_REG 0 +#define COPY_DATA_MEM 1 +#define COPY_DATA_PERF 4 +#define COPY_DATA_IMM 5 +#define COPY_DATA_TIMESTAMP 9 +#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8) +#define COPY_DATA_MEM_ASYNC 5 +#define COPY_DATA_COUNT_SEL (1 << 16) +#define COPY_DATA_WR_CONFIRM (1 << 20) +#define PKT3_EVENT_WRITE 0x46 +#define PKT3_EVENT_WRITE_EOP 0x47 +#define EOP_INT_SEL(x) ((x) << 24) +#define EOP_INT_SEL_NONE 0 +#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3 +#define EOP_DATA_SEL(x) ((x) << 29) +#define EOP_DATA_SEL_DISCARD 0 +#define EOP_DATA_SEL_VALUE_32BIT 1 +#define EOP_DATA_SEL_VALUE_64BIT 2 +#define EOP_DATA_SEL_TIMESTAMP 3 +#define PKT3_RELEASE_MEM 0x49 /* GFX9+ */ +#define PKT3_SET_CONFIG_REG 0x68 +#define PKT3_SET_CONTEXT_REG 0x69 +#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */ +#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */ +#define SURFACE_BASE_UPDATE_DEPTH (1 << 0) +#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x)) +#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1) +#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) +#define PKT3_SET_SH_REG 0x76 /* SI and later */ +#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */ + +#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */ +#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */ +#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */ +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 +#define EVENT_TYPE_ZPASS_DONE 0x15 +#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 +#define EVENT_TYPE_PERFCOUNTER_START 0x17 +#define EVENT_TYPE_PERFCOUNTER_STOP 0x18 +#define EVENT_TYPE_PIPELINESTAT_START 25 +#define EVENT_TYPE_PIPELINESTAT_STOP 26 +#define EVENT_TYPE_PERFCOUNTER_SAMPLE 0x1B +#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30 +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f +#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20 +#define EVENT_TYPE_BOTTOM_OF_PIPE_TS 40 +#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */ +#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */ +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + * 5 - TS events + */ + +#define PREDICATION_OP_CLEAR 0x0 +#define PREDICATION_OP_ZPASS 0x1 +#define PREDICATION_OP_PRIMCOUNT 0x2 +#define PREDICATION_OP_BOOL64 0x3 +#define PRED_OP(x) ((x) << 16) +#define PREDICATION_CONTINUE (1 << 31) +#define PREDICATION_HINT_WAIT (0 << 12) +#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) +#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) +#define PREDICATION_DRAW_VISIBLE (1 << 8) + +#define V_0280A0_SWAP_STD 0x00000000 +#define V_0280A0_SWAP_ALT 0x00000001 +#define V_0280A0_SWAP_STD_REV 0x00000002 +#define V_0280A0_SWAP_ALT_REV 0x00000003 + +#define EG_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 17) +#define SI_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 13) + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c new file mode 100644 index 000000000..b0551d7e1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c @@ -0,0 +1,1492 @@ +/************************************************************************** + * + * Copyright 2011 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include <sys/types.h> +#include <assert.h> +#include <errno.h> +#include <unistd.h> +#include <stdio.h> + +#include "pipe/p_video_codec.h" + +#include "util/u_memory.h" +#include "util/u_video.h" + +#include "vl/vl_defines.h" +#include "vl/vl_mpeg12_decoder.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_uvd.h" + +#define NUM_BUFFERS 4 + +#define NUM_MPEG2_REFS 6 +#define NUM_H264_REFS 17 +#define NUM_VC1_REFS 5 + +#define FB_BUFFER_OFFSET 0x1000 +#define FB_BUFFER_SIZE 2048 +#define FB_BUFFER_SIZE_TONGA (2048 * 64) +#define IT_SCALING_TABLE_SIZE 992 +#define UVD_SESSION_CONTEXT_SIZE (128 * 1024) + +/* UVD decoder representation */ +struct ruvd_decoder { + struct pipe_video_codec base; + + ruvd_set_dtb set_dtb; + + unsigned stream_handle; + unsigned stream_type; + unsigned frame_number; + + struct pipe_screen *screen; + struct radeon_winsys* ws; + struct radeon_winsys_cs* cs; + + unsigned cur_buffer; + + struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS]; + struct ruvd_msg *msg; + uint32_t *fb; + unsigned fb_size; + uint8_t *it; + + struct rvid_buffer bs_buffers[NUM_BUFFERS]; + void* bs_ptr; + unsigned bs_size; + + struct rvid_buffer dpb; + bool use_legacy; + struct rvid_buffer ctx; + struct rvid_buffer sessionctx; + struct { + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned cntl; + } reg; +}; + +/* flush IB to the hardware */ +static int flush(struct ruvd_decoder *dec, unsigned flags) +{ + return dec->ws->cs_flush(dec->cs, flags, NULL); +} + +/* add a new set register command to the IB */ +static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val) +{ + radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0)); + radeon_emit(dec->cs, val); +} + +/* send a command to the VCPU through the GPCOM registers */ +static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, + struct pb_buffer* buf, uint32_t off, + enum radeon_bo_usage usage, enum radeon_bo_domain domain) +{ + int reloc_idx; + + reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, + RADEON_PRIO_UVD); + if (!dec->use_legacy) { + uint64_t addr; + addr = dec->ws->buffer_get_virtual_address(buf); + addr = addr + off; + set_reg(dec, dec->reg.data0, addr); + set_reg(dec, dec->reg.data1, addr >> 32); + } else { + off += dec->ws->buffer_get_reloc_offset(buf); + set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off); + set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4); + } + set_reg(dec, dec->reg.cmd, cmd << 1); +} + +/* do the codec needs an IT buffer ?*/ +static bool have_it(struct ruvd_decoder *dec) +{ + return dec->stream_type == RUVD_CODEC_H264_PERF || + dec->stream_type == RUVD_CODEC_H265; +} + +/* map the next available message/feedback/itscaling buffer */ +static void map_msg_fb_it_buf(struct ruvd_decoder *dec) +{ + struct rvid_buffer* buf; + uint8_t *ptr; + + /* grab the current message/feedback buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + + /* and map it for CPU access */ + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + + /* calc buffer offsets */ + dec->msg = (struct ruvd_msg *)ptr; + memset(dec->msg, 0, sizeof(*dec->msg)); + + dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET); + if (have_it(dec)) + dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size); +} + +/* unmap and send a message command to the VCPU */ +static void send_msg_buf(struct ruvd_decoder *dec) +{ + struct rvid_buffer* buf; + + /* ignore the request if message/feedback buffer isn't mapped */ + if (!dec->msg || !dec->fb) + return; + + /* grab the current message buffer */ + buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + + /* unmap the buffer */ + dec->ws->buffer_unmap(buf->res->buf); + dec->msg = NULL; + dec->fb = NULL; + dec->it = NULL; + + + if (dec->sessionctx.res) + send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER, + dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE, + RADEON_DOMAIN_VRAM); + + /* and send it to the hardware */ + send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0, + RADEON_USAGE_READ, RADEON_DOMAIN_GTT); +} + +/* cycle to the next set of buffers */ +static void next_buffer(struct ruvd_decoder *dec) +{ + ++dec->cur_buffer; + dec->cur_buffer %= NUM_BUFFERS; +} + +/* convert the profile into something UVD understands */ +static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) +{ + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + return RUVD_CODEC_H264; + + case PIPE_VIDEO_FORMAT_VC1: + return RUVD_CODEC_VC1; + + case PIPE_VIDEO_FORMAT_MPEG12: + return RUVD_CODEC_MPEG2; + + case PIPE_VIDEO_FORMAT_MPEG4: + return RUVD_CODEC_MPEG4; + + case PIPE_VIDEO_FORMAT_HEVC: + return RUVD_CODEC_H265; + + case PIPE_VIDEO_FORMAT_JPEG: + return RUVD_CODEC_MJPEG; + + default: + assert(0); + return 0; + } +} + +static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) +{ + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; +} + +static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) +{ + unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); + log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; +} + +static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) +{ + return 16; +} + +/* calculate size of reference picture buffer */ +static unsigned calc_dpb_size(struct ruvd_decoder *dec) +{ + unsigned width_in_mb, height_in_mb, image_size, dpb_size; + + // always align them to MB size for dpb calculation + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + // always one more for currently decoded picture + unsigned max_references = dec->base.max_references + 1; + + // aligned size of a single frame + image_size = align(width, get_db_pitch_alignment(dec)) * height; + image_size += image_size / 2; + image_size = align(image_size, 1024); + + // picture width & height in 16 pixel units + width_in_mb = width / VL_MACROBLOCK_WIDTH; + height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); + + switch (u_reduce_video_profile(dec->base.profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: { + if (!dec->use_legacy) { + unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned alignment = 64, num_dpb_buffer; + + if (dec->stream_type == RUVD_CODEC_H264_PERF) + alignment = 256; + switch(dec->base.level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { + dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); + dpb_size += align(width_in_mb * height_in_mb * 32, alignment); + } + } else { + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_H264_REFS, max_references); + // reference picture buffer + dpb_size = image_size * max_references; + if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { + // macroblock context buffer + dpb_size += width_in_mb * height_in_mb * max_references * 192; + // IT surface buffer + dpb_size += width_in_mb * height_in_mb * 32; + } + } + break; + } + + case PIPE_VIDEO_FORMAT_HEVC: + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references; + else + dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references; + break; + + case PIPE_VIDEO_FORMAT_VC1: + // the firmware seems to allways assume a minimum of ref frames + max_references = MAX2(NUM_VC1_REFS, max_references); + + // reference picture buffer + dpb_size = image_size * max_references; + + // CONTEXT_BUFFER + dpb_size += width_in_mb * height_in_mb * 128; + + // IT surface buffer + dpb_size += width_in_mb * 64; + + // DB surface buffer + dpb_size += width_in_mb * 128; + + // BP + dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + // reference picture buffer, must be big enough for all frames + dpb_size = image_size * NUM_MPEG2_REFS; + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + // reference picture buffer + dpb_size = image_size * max_references; + + // CM + dpb_size += width_in_mb * height_in_mb * 64; + + // IT surface buffer + dpb_size += align(width_in_mb * height_in_mb * 32, 64); + + dpb_size = MAX2(dpb_size, 30 * 1024 * 1024); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + dpb_size = 0; + break; + + default: + // something is missing here + assert(0); + + // at least use a sane default value + dpb_size = 32 * 1024 * 1024; + break; + } + return dpb_size; +} + +/* free associated data in the video buffer callback */ +static void ruvd_destroy_associated_data(void *data) +{ + /* NOOP, since we only use an intptr */ +} + +/* get h264 specific message bits */ +static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic) +{ + struct ruvd_h264 result; + + memset(&result, 0, sizeof(result)); + switch (pic->base.profile) { + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: + result.profile = RUVD_H264_PROFILE_BASELINE; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + result.profile = RUVD_H264_PROFILE_MAIN; + break; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + result.profile = RUVD_H264_PROFILE_HIGH; + break; + + default: + assert(0); + break; + } + + result.level = dec->base.level; + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0; + result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1; + result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2; + result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3; + + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4; + result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + + switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0; + result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2; + result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3; + result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 6; + result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7; + result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8; + + result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1; + result.slice_group_map_type = pic->pps->slice_group_map_type; + result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1; + result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26; + result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset; + result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset; + + memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16); + memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64); + + if (dec->stream_type == RUVD_CODEC_H264_PERF) { + memcpy(dec->it, result.scaling_list_4x4, 6*16); + memcpy((dec->it + 96), result.scaling_list_8x8, 2*64); + } + + result.num_ref_frames = pic->num_ref_frames; + + result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + result.frame_num = pic->frame_num; + memcpy(result.frame_num_list, pic->frame_num_list, 4*16); + result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0]; + result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1]; + memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2); + + result.decoded_pic_idx = pic->frame_num; + + return result; +} + +/* get h265 specific message bits */ +static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) +{ + struct ruvd_h265 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ??? + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_idx = pic->CurrPicOrderCntVal; + result.curr_poc = pic->CurrPicOrderCntVal; + + vl_video_buffer_set_associated_data(target, &dec->base, + (void *)(uintptr_t)pic->CurrPicOrderCntVal, + &ruvd_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) { + if (target->buffer_format == PIPE_FORMAT_P016) { + result.p010_mode = 1; + result.msb_mode = 1; + } else { + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.sclr_luma10to8 = 4; + result.sclr_chroma10to8 = 4; + } + } + + /* TODO + result.highestTid; + result.isNonRef; + + IDRPicFlag; + RAPPicFlag; + NumPocTotalCurr; + NumShortTermPictureSliceHeaderBits; + NumLongTermPictureSliceHeaderBits; + + IsLongTerm[16]; + */ + + return result; +} + +/* get vc1 specific message bits */ +static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic) +{ + struct ruvd_vc1 result; + + memset(&result, 0, sizeof(result)); + + switch(pic->base.profile) { + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + result.profile = RUVD_VC1_PROFILE_SIMPLE; + result.level = 1; + break; + + case PIPE_VIDEO_PROFILE_VC1_MAIN: + result.profile = RUVD_VC1_PROFILE_MAIN; + result.level = 2; + break; + + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + result.profile = RUVD_VC1_PROFILE_ADVANCED; + result.level = 4; + break; + + default: + assert(0); + } + + /* fields common for all profiles */ + result.sps_info_flags |= pic->postprocflag << 7; + result.sps_info_flags |= pic->pulldown << 6; + result.sps_info_flags |= pic->interlace << 5; + result.sps_info_flags |= pic->tfcntrflag << 4; + result.sps_info_flags |= pic->finterpflag << 3; + result.sps_info_flags |= pic->psf << 1; + + result.pps_info_flags |= pic->range_mapy_flag << 31; + result.pps_info_flags |= pic->range_mapy << 28; + result.pps_info_flags |= pic->range_mapuv_flag << 27; + result.pps_info_flags |= pic->range_mapuv << 24; + result.pps_info_flags |= pic->multires << 21; + result.pps_info_flags |= pic->maxbframes << 16; + result.pps_info_flags |= pic->overlap << 11; + result.pps_info_flags |= pic->quantizer << 9; + result.pps_info_flags |= pic->panscan_flag << 7; + result.pps_info_flags |= pic->refdist_flag << 6; + result.pps_info_flags |= pic->vstransform << 0; + + /* some fields only apply to main/advanced profile */ + if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) { + result.pps_info_flags |= pic->syncmarker << 20; + result.pps_info_flags |= pic->rangered << 19; + result.pps_info_flags |= pic->loopfilter << 5; + result.pps_info_flags |= pic->fastuvmc << 4; + result.pps_info_flags |= pic->extended_mv << 3; + result.pps_info_flags |= pic->extended_dmv << 8; + result.pps_info_flags |= pic->dquant << 1; + } + + result.chroma_format = 1; + +#if 0 +//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT) +uint32_t slice_count +uint8_t picture_type +uint8_t frame_coding_mode +uint8_t deblockEnable +uint8_t pquant +#endif + + return result; +} + +/* extract the frame number from a referenced video buffer */ +static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref) +{ + uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS; + uint32_t max = MAX2(dec->frame_number, 1) - 1; + uintptr_t frame; + + /* seems to be the most sane fallback */ + if (!ref) + return max; + + /* get the frame number from the associated data */ + frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + + /* limit the frame number to a valid range */ + return MAX2(MIN2(frame, max), min); +} + +/* get mpeg2 specific msg bits */ +static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, + struct pipe_mpeg12_picture_desc *pic) +{ + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + struct ruvd_mpeg2 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + result.load_intra_quantiser_matrix = 1; + result.load_nonintra_quantiser_matrix = 1; + + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } + + result.profile_and_level_indication = 0; + result.chroma_format = 0x1; + + result.picture_coding_type = pic->picture_coding_type; + result.f_code[0][0] = pic->f_code[0][0] + 1; + result.f_code[0][1] = pic->f_code[0][1] + 1; + result.f_code[1][0] = pic->f_code[1][0] + 1; + result.f_code[1][1] = pic->f_code[1][1] + 1; + result.intra_dc_precision = pic->intra_dc_precision; + result.pic_structure = pic->picture_structure; + result.top_field_first = pic->top_field_first; + result.frame_pred_frame_dct = pic->frame_pred_frame_dct; + result.concealment_motion_vectors = pic->concealment_motion_vectors; + result.q_scale_type = pic->q_scale_type; + result.intra_vlc_format = pic->intra_vlc_format; + result.alternate_scan = pic->alternate_scan; + + return result; +} + +/* get mpeg4 specific msg bits */ +static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec, + struct pipe_mpeg4_picture_desc *pic) +{ + struct ruvd_mpeg4 result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.decoded_pic_idx = dec->frame_number; + for (i = 0; i < 2; ++i) + result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]); + + result.variant_type = 0; + result.profile_and_level_indication = 0xF0; // ASP Level0 + + result.video_object_layer_verid = 0x5; // advanced simple + result.video_object_layer_shape = 0x0; // rectangular + + result.video_object_layer_width = dec->base.width; + result.video_object_layer_height = dec->base.height; + + result.vop_time_increment_resolution = pic->vop_time_increment_resolution; + + result.flags |= pic->short_video_header << 0; + //result.flags |= obmc_disable << 1; + result.flags |= pic->interlaced << 2; + result.flags |= 1 << 3; // load_intra_quant_mat + result.flags |= 1 << 4; // load_nonintra_quant_mat + result.flags |= pic->quarter_sample << 5; + result.flags |= 1 << 6; // complexity_estimation_disable + result.flags |= pic->resync_marker_disable << 7; + //result.flags |= data_partitioned << 8; + //result.flags |= reversible_vlc << 9; + result.flags |= 0 << 10; // newpred_enable + result.flags |= 0 << 11; // reduced_resolution_vop_enable + //result.flags |= scalability << 12; + //result.flags |= is_object_layer_identifier << 13; + //result.flags |= fixed_vop_rate << 14; + //result.flags |= newpred_segment_type << 15; + + result.quant_type = pic->quant_type; + + for (i = 0; i < 64; ++i) { + result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]]; + result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]]; + } + + /* + int32_t trd [2] + int32_t trb [2] + uint8_t vop_coding_type + uint8_t vop_fcode_forward + uint8_t vop_fcode_backward + uint8_t rounding_control + uint8_t alternate_vertical_scan_flag + uint8_t top_field_first + */ + + return result; +} + +static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic) +{ + int size = 0, saved_size, len_pos, i; + uint16_t *bs; + uint8_t *buf = dec->bs_ptr; + + /* SOI */ + buf[size++] = 0xff; + buf[size++] = 0xd8; + + /* DQT */ + buf[size++] = 0xff; + buf[size++] = 0xdb; + + len_pos = size++; + size++; + + for (i = 0; i < 4; ++i) { + if (pic->quantization_table.load_quantiser_table[i] == 0) + continue; + + buf[size++] = i; + memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64); + size += 64; + } + + bs = (uint16_t*)&buf[len_pos]; + *bs = util_bswap16(size - 4); + + saved_size = size; + + /* DHT */ + buf[size++] = 0xff; + buf[size++] = 0xc4; + + len_pos = size++; + size++; + + for (i = 0; i < 2; ++i) { + if (pic->huffman_table.load_huffman_table[i] == 0) + continue; + + buf[size++] = 0x00 | i; + memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16); + size += 16; + memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12); + size += 12; + } + + for (i = 0; i < 2; ++i) { + if (pic->huffman_table.load_huffman_table[i] == 0) + continue; + + buf[size++] = 0x10 | i; + memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16); + size += 16; + memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162); + size += 162; + } + + bs = (uint16_t*)&buf[len_pos]; + *bs = util_bswap16(size - saved_size - 2); + + saved_size = size; + + /* DRI */ + if (pic->slice_parameter.restart_interval) { + buf[size++] = 0xff; + buf[size++] = 0xdd; + buf[size++] = 0x00; + buf[size++] = 0x04; + bs = (uint16_t*)&buf[size++]; + *bs = util_bswap16(pic->slice_parameter.restart_interval); + saved_size = ++size; + } + + /* SOF */ + buf[size++] = 0xff; + buf[size++] = 0xc0; + + len_pos = size++; + size++; + + buf[size++] = 0x08; + + bs = (uint16_t*)&buf[size++]; + *bs = util_bswap16(pic->picture_parameter.picture_height); + size++; + + bs = (uint16_t*)&buf[size++]; + *bs = util_bswap16(pic->picture_parameter.picture_width); + size++; + + buf[size++] = pic->picture_parameter.num_components; + + for (i = 0; i < pic->picture_parameter.num_components; ++i) { + buf[size++] = pic->picture_parameter.components[i].component_id; + buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 | + pic->picture_parameter.components[i].v_sampling_factor; + buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector; + } + + bs = (uint16_t*)&buf[len_pos]; + *bs = util_bswap16(size - saved_size - 2); + + saved_size = size; + + /* SOS */ + buf[size++] = 0xff; + buf[size++] = 0xda; + + len_pos = size++; + size++; + + buf[size++] = pic->slice_parameter.num_components; + + for (i = 0; i < pic->slice_parameter.num_components; ++i) { + buf[size++] = pic->slice_parameter.components[i].component_selector; + buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 | + pic->slice_parameter.components[i].ac_table_selector; + } + + buf[size++] = 0x00; + buf[size++] = 0x3f; + buf[size++] = 0x00; + + bs = (uint16_t*)&buf[len_pos]; + *bs = util_bswap16(size - saved_size - 2); + + dec->bs_ptr += size; + dec->bs_size += size; +} + +/** + * destroy this video decoder + */ +static void ruvd_destroy(struct pipe_video_codec *decoder) +{ + struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; + unsigned i; + + assert(decoder); + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DESTROY; + dec->msg->stream_handle = dec->stream_handle; + send_msg_buf(dec); + + flush(dec, 0); + + dec->ws->cs_destroy(dec->cs); + + for (i = 0; i < NUM_BUFFERS; ++i) { + rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + rvid_destroy_buffer(&dec->bs_buffers[i]); + } + + rvid_destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->ctx); + rvid_destroy_buffer(&dec->sessionctx); + + FREE(dec); +} + +/** + * start decoding of a new frame + */ +static void ruvd_begin_frame(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; + uintptr_t frame; + + assert(decoder); + + frame = ++dec->frame_number; + vl_video_buffer_set_associated_data(target, decoder, (void *)frame, + &ruvd_destroy_associated_data); + + dec->bs_size = 0; + dec->bs_ptr = dec->ws->buffer_map( + dec->bs_buffers[dec->cur_buffer].res->buf, + dec->cs, PIPE_TRANSFER_WRITE); +} + +/** + * decode a macroblock + */ +static void ruvd_decode_macroblock(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + const struct pipe_macroblock *macroblocks, + unsigned num_macroblocks) +{ + /* not supported (yet) */ + assert(0); +} + +/** + * decode a bitstream + */ +static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void * const *buffers, + const unsigned *sizes) +{ + struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; + enum pipe_video_format format = u_reduce_video_profile(picture->profile); + unsigned i; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + if (format == PIPE_VIDEO_FORMAT_JPEG) + get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture); + + for (i = 0; i < num_buffers; ++i) { + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer]; + unsigned new_size = dec->bs_size + sizes[i]; + + if (format == PIPE_VIDEO_FORMAT_JPEG) + new_size += 2; /* save for EOI */ + + if (new_size > buf->res->buf->size) { + dec->ws->buffer_unmap(buf->res->buf); + if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) { + RVID_ERR("Can't resize bitstream buffer!"); + return; + } + + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE); + if (!dec->bs_ptr) + return; + + dec->bs_ptr += dec->bs_size; + } + + memcpy(dec->bs_ptr, buffers[i], sizes[i]); + dec->bs_size += sizes[i]; + dec->bs_ptr += sizes[i]; + } + + if (format == PIPE_VIDEO_FORMAT_JPEG) { + ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */ + ((uint8_t *)dec->bs_ptr)[1] = 0xd9; + dec->bs_size += 2; + dec->bs_ptr += 2; + } +} + +/** + * end decoding of the current frame + */ +static void ruvd_end_frame(struct pipe_video_codec *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ + struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder; + struct pb_buffer *dt; + struct rvid_buffer *msg_fb_it_buf, *bs_buf; + unsigned bs_size; + + assert(decoder); + + if (!dec->bs_ptr) + return; + + msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; + bs_buf = &dec->bs_buffers[dec->cur_buffer]; + + bs_size = align(dec->bs_size, 128); + memset(dec->bs_ptr, 0, bs_size - dec->bs_size); + dec->ws->buffer_unmap(bs_buf->res->buf); + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_DECODE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->status_report_feedback_number = dec->frame_number; + + dec->msg->body.decode.stream_type = dec->stream_type; + dec->msg->body.decode.decode_flags = 0x1; + dec->msg->body.decode.width_in_samples = dec->base.width; + dec->msg->body.decode.height_in_samples = dec->base.height; + + if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) || + (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) { + dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16; + dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16; + } + + if (dec->dpb.res) + dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size; + dec->msg->body.decode.bsd_size = bs_size; + dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); + + dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); + + switch (u_reduce_video_profile(picture->profile)) { + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture); + break; + + case PIPE_VIDEO_FORMAT_HEVC: + dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated context buffer.\n"); + } + rvid_clear_buffer(decoder->context, &dec->ctx); + } + + if (dec->ctx.res) + dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; + break; + + case PIPE_VIDEO_FORMAT_VC1: + dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG12: + dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture); + break; + + case PIPE_VIDEO_FORMAT_MPEG4: + dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture); + break; + + case PIPE_VIDEO_FORMAT_JPEG: + break; + + default: + assert(0); + return; + } + + dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config; + dec->msg->body.decode.extension_support = 0x1; + + /* set at least the feedback buffer size */ + dec->fb[0] = dec->fb_size; + + send_msg_buf(dec); + + if (dec->dpb.res) + send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + + if (dec->ctx.res) + send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, + RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, + 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0, + RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM); + send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT); + if (have_it(dec)) + send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf, + FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT); + set_reg(dec, dec->reg.cntl, 1); + + flush(dec, RADEON_FLUSH_ASYNC); + next_buffer(dec); +} + +/** + * flush any outstanding command buffers to the hardware + */ +static void ruvd_flush(struct pipe_video_codec *decoder) +{ +} + +/** + * create and UVD decoder + */ +struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ, + ruvd_set_dtb set_dtb) +{ + struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws; + struct r600_common_context *rctx = (struct r600_common_context*)context; + unsigned dpb_size; + unsigned width = templ->width, height = templ->height; + unsigned bs_buf_size; + struct radeon_info info; + struct ruvd_decoder *dec; + int r, i; + + ws->query_info(ws, &info); + + switch(u_reduce_video_profile(templ->profile)) { + case PIPE_VIDEO_FORMAT_MPEG12: + if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM) + return vl_create_mpeg12_decoder(context, templ); + + /* fall through */ + case PIPE_VIDEO_FORMAT_MPEG4: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + width = align(width, VL_MACROBLOCK_WIDTH); + height = align(height, VL_MACROBLOCK_HEIGHT); + break; + + default: + break; + } + + + dec = CALLOC_STRUCT(ruvd_decoder); + + if (!dec) + return NULL; + + if (info.drm_major < 3) + dec->use_legacy = true; + + dec->base = *templ; + dec->base.context = context; + dec->base.width = width; + dec->base.height = height; + + dec->base.destroy = ruvd_destroy; + dec->base.begin_frame = ruvd_begin_frame; + dec->base.decode_macroblock = ruvd_decode_macroblock; + dec->base.decode_bitstream = ruvd_decode_bitstream; + dec->base.end_frame = ruvd_end_frame; + dec->base.flush = ruvd_flush; + + dec->stream_type = profile2stream_type(dec, info.family); + dec->set_dtb = set_dtb; + dec->stream_handle = rvid_alloc_stream_handle(); + dec->screen = context->screen; + dec->ws = ws; + dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL); + if (!dec->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + dec->fb_size = FB_BUFFER_SIZE; + bs_buf_size = width * height * (512 / (16 * 16)); + for (i = 0; i < NUM_BUFFERS; ++i) { + unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; + STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); + if (have_it(dec)) + msg_fb_it_size += IT_SCALING_TABLE_SIZE; + if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i], + msg_fb_it_size, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated message buffers.\n"); + goto error; + } + + if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i], + bs_buf_size, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't allocated bitstream buffers.\n"); + goto error; + } + + rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]); + rvid_clear_buffer(context, &dec->bs_buffers[i]); + } + + dpb_size = calc_dpb_size(dec); + if (dpb_size) { + if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't allocated dpb.\n"); + goto error; + } + rvid_clear_buffer(context, &dec->dpb); + } + + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; + dec->reg.cntl = RUVD_ENGINE_CNTL; + + map_msg_fb_it_buf(dec); + dec->msg->size = sizeof(*dec->msg); + dec->msg->msg_type = RUVD_MSG_CREATE; + dec->msg->stream_handle = dec->stream_handle; + dec->msg->body.create.stream_type = dec->stream_type; + dec->msg->body.create.width_in_samples = dec->base.width; + dec->msg->body.create.height_in_samples = dec->base.height; + dec->msg->body.create.dpb_size = dpb_size; + send_msg_buf(dec); + r = flush(dec, 0); + if (r) + goto error; + + next_buffer(dec); + + return &dec->base; + +error: + if (dec->cs) dec->ws->cs_destroy(dec->cs); + + for (i = 0; i < NUM_BUFFERS; ++i) { + rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]); + rvid_destroy_buffer(&dec->bs_buffers[i]); + } + + rvid_destroy_buffer(&dec->dpb); + rvid_destroy_buffer(&dec->ctx); + rvid_destroy_buffer(&dec->sessionctx); + + FREE(dec); + + return NULL; +} + +/* calculate top/bottom offset */ +static unsigned texture_offset(struct radeon_surf *surface, unsigned layer) +{ + return surface->u.legacy.level[0].offset + + layer * surface->u.legacy.level[0].slice_size; +} + +/* hw encode the aspect of macro tiles */ +static unsigned macro_tile_aspect(unsigned macro_tile_aspect) +{ + switch (macro_tile_aspect) { + default: + case 1: macro_tile_aspect = 0; break; + case 2: macro_tile_aspect = 1; break; + case 4: macro_tile_aspect = 2; break; + case 8: macro_tile_aspect = 3; break; + } + return macro_tile_aspect; +} + +/* hw encode the bank width and height */ +static unsigned bank_wh(unsigned bankwh) +{ + switch (bankwh) { + default: + case 1: bankwh = 0; break; + case 2: bankwh = 1; break; + case 4: bankwh = 2; break; + case 8: bankwh = 3; break; + } + return bankwh; +} + +/** + * fill decoding target field from the luma and chroma surfaces + */ +void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, + struct radeon_surf *chroma) +{ + msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; + switch (luma->u.legacy.level[0].mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + break; + case RADEON_SURF_MODE_1D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; + break; + case RADEON_SURF_MODE_2D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; + break; + default: + assert(0); + break; + } + + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0); + if (chroma) + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1); + if (chroma) + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } + + if (chroma) { + assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); + assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); + assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); + } + + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); + msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); +} diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h new file mode 100644 index 000000000..c371b1441 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h @@ -0,0 +1,442 @@ +/************************************************************************** + * + * Copyright 2011 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef RADEON_UVD_H +#define RADEON_UVD_H + +#include "radeon/radeon_winsys.h" +#include "vl/vl_video_buffer.h" + +/* UVD uses PM4 packet type 0 and 2 */ +#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) +#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3) +#define RUVD_PKT_TYPE_C 0x3FFFFFFF +#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16) +#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) +#define RUVD_PKT_COUNT_C 0xC000FFFF +#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0) +#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) +#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000 +#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count)) +#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2)) + +/* registers involved with UVD */ +#define RUVD_GPCOM_VCPU_CMD 0xEF0C +#define RUVD_GPCOM_VCPU_DATA0 0xEF10 +#define RUVD_GPCOM_VCPU_DATA1 0xEF14 +#define RUVD_ENGINE_CNTL 0xEF18 + +#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c +#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710 +#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714 +#define RUVD_ENGINE_CNTL_SOC15 0x20718 + +/* UVD commands to VCPU */ +#define RUVD_CMD_MSG_BUFFER 0x00000000 +#define RUVD_CMD_DPB_BUFFER 0x00000001 +#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002 +#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003 +#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005 +#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100 +#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204 +#define RUVD_CMD_CONTEXT_BUFFER 0x00000206 + +/* UVD message types */ +#define RUVD_MSG_CREATE 0 +#define RUVD_MSG_DECODE 1 +#define RUVD_MSG_DESTROY 2 + +/* UVD stream types */ +#define RUVD_CODEC_H264 0x00000000 +#define RUVD_CODEC_VC1 0x00000001 +#define RUVD_CODEC_MPEG2 0x00000003 +#define RUVD_CODEC_MPEG4 0x00000004 +#define RUVD_CODEC_H264_PERF 0x00000007 +#define RUVD_CODEC_MJPEG 0x00000008 +#define RUVD_CODEC_H265 0x00000010 + +/* UVD decode target buffer tiling mode */ +#define RUVD_TILE_LINEAR 0x00000000 +#define RUVD_TILE_8X4 0x00000001 +#define RUVD_TILE_8X8 0x00000002 +#define RUVD_TILE_32AS8 0x00000003 + +/* UVD decode target buffer array mode */ +#define RUVD_ARRAY_MODE_LINEAR 0x00000000 +#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001 +#define RUVD_ARRAY_MODE_1D_THIN 0x00000002 +#define RUVD_ARRAY_MODE_2D_THIN 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004 +#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005 + +/* UVD tile config */ +#define RUVD_BANK_WIDTH(x) ((x) << 0) +#define RUVD_BANK_HEIGHT(x) ((x) << 3) +#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6) +#define RUVD_NUM_BANKS(x) ((x) << 9) + +/* H.264 profile definitions */ +#define RUVD_H264_PROFILE_BASELINE 0x00000000 +#define RUVD_H264_PROFILE_MAIN 0x00000001 +#define RUVD_H264_PROFILE_HIGH 0x00000002 +#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003 +#define RUVD_H264_PROFILE_MVC 0x00000004 + +/* VC-1 profile definitions */ +#define RUVD_VC1_PROFILE_SIMPLE 0x00000000 +#define RUVD_VC1_PROFILE_MAIN 0x00000001 +#define RUVD_VC1_PROFILE_ADVANCED 0x00000002 + +struct ruvd_mvc_element { + uint16_t viewOrderIndex; + uint16_t viewId; + uint16_t numOfAnchorRefsInL0; + uint16_t viewIdOfAnchorRefsInL0[15]; + uint16_t numOfAnchorRefsInL1; + uint16_t viewIdOfAnchorRefsInL1[15]; + uint16_t numOfNonAnchorRefsInL0; + uint16_t viewIdOfNonAnchorRefsInL0[15]; + uint16_t numOfNonAnchorRefsInL1; + uint16_t viewIdOfNonAnchorRefsInL1[15]; +}; + +struct ruvd_h264 { + uint32_t profile; + uint32_t level; + + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_frame_num_minus4; + + uint8_t pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t num_ref_frames; + uint8_t reserved_8bit; + + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; + + uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + + uint16_t slice_group_change_rate_minus1; + uint16_t reserved_16bit_1; + + uint8_t scaling_list_4x4[6][16]; + uint8_t scaling_list_8x8[2][64]; + + uint32_t frame_num; + uint32_t frame_num_list[16]; + int32_t curr_field_order_cnt_list[2]; + int32_t field_order_cnt_list[16][2]; + + uint32_t decoded_pic_idx; + + uint32_t curr_pic_ref_frame_num; + + uint8_t ref_frame_list[16]; + + uint32_t reserved[122]; + + struct { + uint32_t numViews; + uint32_t viewId0; + struct ruvd_mvc_element mvcElements[1]; + } mvc; +}; + +struct ruvd_h265 { + uint32_t sps_info_flags; + uint32_t pps_info_flags; + + uint8_t chroma_format; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + + uint8_t sps_max_dec_pic_buffering_minus1; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_transform_block_size_minus2; + + uint8_t log2_diff_max_min_transform_block_size; + uint8_t max_transform_hierarchy_depth_inter; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t pcm_sample_bit_depth_luma_minus1; + + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint8_t num_extra_slice_header_bits; + + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pic_sps; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + + uint8_t diff_cu_qp_delta_depth; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + uint8_t log2_parallel_merge_level_minus2; + + uint16_t column_width_minus1[19]; + uint16_t row_height_minus1[21]; + + int8_t init_qp_minus26; + uint8_t num_delta_pocs_ref_rps_idx; + uint8_t curr_idx; + uint8_t reserved1; + int32_t curr_poc; + uint8_t ref_pic_list[16]; + int32_t poc_list[16]; + uint8_t ref_pic_set_st_curr_before[8]; + uint8_t ref_pic_set_st_curr_after[8]; + uint8_t ref_pic_set_lt_curr[8]; + + uint8_t ucScalingListDCCoefSizeID2[6]; + uint8_t ucScalingListDCCoefSizeID3[2]; + + uint8_t highestTid; + uint8_t isNonRef; + + uint8_t p010_mode; + uint8_t msb_mode; + uint8_t luma_10to8; + uint8_t chroma_10to8; + uint8_t sclr_luma10to8; + uint8_t sclr_chroma10to8; + + uint8_t direct_reflist[2][15]; +}; + +struct ruvd_vc1 { + uint32_t profile; + uint32_t level; + uint32_t sps_info_flags; + uint32_t pps_info_flags; + uint32_t pic_structure; + uint32_t chroma_format; +}; + +struct ruvd_mpeg2 { + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; + + uint8_t load_intra_quantiser_matrix; + uint8_t load_nonintra_quantiser_matrix; + uint8_t reserved_quantiser_alignement[2]; + uint8_t intra_quantiser_matrix[64]; + uint8_t nonintra_quantiser_matrix[64]; + + uint8_t profile_and_level_indication; + uint8_t chroma_format; + + uint8_t picture_coding_type; + + uint8_t reserved_1; + + uint8_t f_code[2][2]; + uint8_t intra_dc_precision; + uint8_t pic_structure; + uint8_t top_field_first; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t q_scale_type; + uint8_t intra_vlc_format; + uint8_t alternate_scan; +}; + +struct ruvd_mpeg4 +{ + uint32_t decoded_pic_idx; + uint32_t ref_pic_idx[2]; + + uint32_t variant_type; + uint8_t profile_and_level_indication; + + uint8_t video_object_layer_verid; + uint8_t video_object_layer_shape; + + uint8_t reserved_1; + + uint16_t video_object_layer_width; + uint16_t video_object_layer_height; + + uint16_t vop_time_increment_resolution; + + uint16_t reserved_2; + + uint32_t flags; + + uint8_t quant_type; + + uint8_t reserved_3[3]; + + uint8_t intra_quant_mat[64]; + uint8_t nonintra_quant_mat[64]; + + struct { + uint8_t sprite_enable; + + uint8_t reserved_4[3]; + + uint16_t sprite_width; + uint16_t sprite_height; + int16_t sprite_left_coordinate; + int16_t sprite_top_coordinate; + + uint8_t no_of_sprite_warping_points; + uint8_t sprite_warping_accuracy; + uint8_t sprite_brightness_change; + uint8_t low_latency_sprite_enable; + } sprite_config; + + struct { + uint32_t flags; + uint8_t vol_mode; + uint8_t reserved_5[3]; + } divx_311_config; +}; + +/* message between driver and hardware */ +struct ruvd_msg { + + uint32_t size; + uint32_t msg_type; + uint32_t stream_handle; + uint32_t status_report_feedback_number; + + union { + struct { + uint32_t stream_type; + uint32_t session_flags; + uint32_t asic_id; + uint32_t width_in_samples; + uint32_t height_in_samples; + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t version_info; + } create; + + struct { + uint32_t stream_type; + uint32_t decode_flags; + uint32_t width_in_samples; + uint32_t height_in_samples; + + uint32_t dpb_buffer; + uint32_t dpb_size; + uint32_t dpb_model; + uint32_t dpb_reserved; + + uint32_t db_offset_alignment; + uint32_t db_pitch; + uint32_t db_tiling_mode; + uint32_t db_array_mode; + uint32_t db_field_mode; + uint32_t db_surf_tile_config; + uint32_t db_aligned_height; + uint32_t db_reserved; + + uint32_t use_addr_macro; + + uint32_t bsd_buffer; + uint32_t bsd_size; + + uint32_t pic_param_buffer; + uint32_t pic_param_size; + uint32_t mb_cntl_buffer; + uint32_t mb_cntl_size; + + uint32_t dt_buffer; + uint32_t dt_pitch; + uint32_t dt_tiling_mode; + uint32_t dt_array_mode; + uint32_t dt_field_mode; + uint32_t dt_luma_top_offset; + uint32_t dt_luma_bottom_offset; + uint32_t dt_chroma_top_offset; + uint32_t dt_chroma_bottom_offset; + uint32_t dt_surf_tile_config; + uint32_t dt_uv_surf_tile_config; + // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney + uint32_t dt_wa_chroma_top_offset; + uint32_t dt_wa_chroma_bottom_offset; + + uint32_t reserved[16]; + + union { + struct ruvd_h264 h264; + struct ruvd_h265 h265; + struct ruvd_vc1 vc1; + struct ruvd_mpeg2 mpeg2; + struct ruvd_mpeg4 mpeg4; + + uint32_t info[768]; + } codec; + + uint8_t extension_support; + uint8_t reserved_8bit_1; + uint8_t reserved_8bit_2; + uint8_t reserved_8bit_3; + uint32_t extension_reserved[64]; + } decode; + } body; +}; + +/* driver dependent callback */ +typedef struct pb_buffer* (*ruvd_set_dtb) +(struct ruvd_msg* msg, struct vl_video_buffer *vb); + +/* create an UVD decode */ +struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templat, + ruvd_set_dtb set_dtb); + +/* fill decoding target field from the luma and chroma surfaces */ +void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, + struct radeon_surf *chroma); +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.c b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c new file mode 100644 index 000000000..16a0127f3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c @@ -0,0 +1,533 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include <stdio.h> + +#include "pipe/p_video_codec.h" + +#include "util/u_video.h" +#include "util/u_memory.h" + +#include "vl/vl_video_buffer.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_vce.h" + +#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8)) +#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8)) +#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8)) +#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8)) +#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8)) +#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8)) +#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8)) +#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) +#define FW_53 (53 << 24) + +/** + * flush commands to the hardware + */ +static void flush(struct rvce_encoder *enc) +{ + enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL); + enc->task_info_idx = 0; + enc->bs_idx = 0; +} + +#if 0 +static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb) +{ + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + unsigned i = 0; + fprintf(stderr, "\n"); + fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]); + fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]); + fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]); + fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]); + fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]); + fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]); + fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]); + fprintf(stderr, "\n"); + enc->ws->buffer_unmap(fb->res->buf); +} +#endif + +/** + * reset the CPB handling + */ +static void reset_cpb(struct rvce_encoder *enc) +{ + unsigned i; + + LIST_INITHEAD(&enc->cpb_slots); + for (i = 0; i < enc->cpb_num; ++i) { + struct rvce_cpb_slot *slot = &enc->cpb_array[i]; + slot->index = i; + slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP; + slot->frame_num = 0; + slot->pic_order_cnt = 0; + LIST_ADDTAIL(&slot->list, &enc->cpb_slots); + } +} + +/** + * sort l0 and l1 to the top of the list + */ +static void sort_cpb(struct rvce_encoder *enc) +{ + struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL; + + LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) { + if (i->frame_num == enc->pic.ref_idx_l0) + l0 = i; + + if (i->frame_num == enc->pic.ref_idx_l1) + l1 = i; + + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0) + break; + + if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B && + l0 && l1) + break; + } + + if (l1) { + LIST_DEL(&l1->list); + LIST_ADD(&l1->list, &enc->cpb_slots); + } + + if (l0) { + LIST_DEL(&l0->list); + LIST_ADD(&l0->list, &enc->cpb_slots); + } +} + +/** + * get number of cpbs based on dpb + */ +static unsigned get_cpb_num(struct rvce_encoder *enc) +{ + unsigned w = align(enc->base.width, 16) / 16; + unsigned h = align(enc->base.height, 16) / 16; + unsigned dpb; + + switch (enc->base.level) { + case 10: + dpb = 396; + break; + case 11: + dpb = 900; + break; + case 12: + case 13: + case 20: + dpb = 2376; + break; + case 21: + dpb = 4752; + break; + case 22: + case 30: + dpb = 8100; + break; + case 31: + dpb = 18000; + break; + case 32: + dpb = 20480; + break; + case 40: + case 41: + dpb = 32768; + break; + case 42: + dpb = 34816; + break; + case 50: + dpb = 110400; + break; + default: + case 51: + case 52: + dpb = 184320; + break; + } + + return MIN2(dpb / (w * h), 16); +} + +/** + * Get the slot for the currently encoded frame + */ +struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list); +} + +/** + * Get the slot for L0 + */ +struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list); +} + +/** + * Get the slot for L1 + */ +struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc) +{ + return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list); +} + +/** + * Calculate the offsets into the CPB + */ +void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, + signed *luma_offset, signed *chroma_offset) +{ + unsigned pitch, vpitch, fsize; + + pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); + vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); + fsize = pitch * (vpitch + vpitch / 2); + + *luma_offset = slot->index * fsize; + *chroma_offset = *luma_offset + pitch * vpitch; +} + +/** + * destroy this video encoder + */ +static void rvce_destroy(struct pipe_video_codec *encoder) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + if (enc->stream_handle) { + struct rvid_buffer fb; + rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->feedback(enc); + enc->destroy(enc); + flush(enc); + rvid_destroy_buffer(&fb); + } + rvid_destroy_buffer(&enc->cpb); + enc->ws->cs_destroy(enc->cs); + FREE(enc->cpb_array); + FREE(enc); +} + +static void rvce_begin_frame(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; + struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; + + bool need_rate_control = + enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method || + enc->pic.quant_i_frames != pic->quant_i_frames || + enc->pic.quant_p_frames != pic->quant_p_frames || + enc->pic.quant_b_frames != pic->quant_b_frames; + + enc->pic = *pic; + get_pic_param(enc, pic); + + enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma); + enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma); + + if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR) + reset_cpb(enc); + else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P || + pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) + sort_cpb(enc); + + if (!enc->stream_handle) { + struct rvid_buffer fb; + enc->stream_handle = rvid_alloc_stream_handle(); + rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING); + enc->fb = &fb; + enc->session(enc); + enc->create(enc); + enc->config(enc); + enc->feedback(enc); + flush(enc); + //dump_feedback(enc, &fb); + rvid_destroy_buffer(&fb); + need_rate_control = false; + } + + if (need_rate_control) { + enc->session(enc); + enc->config(enc); + flush(enc); + } +} + +static void rvce_encode_bitstream(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_resource *destination, + void **fb) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + enc->get_buffer(destination, &enc->bs_handle, NULL); + enc->bs_size = destination->width0; + + *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); + if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) { + RVID_ERR("Can't create feedback buffer.\n"); + return; + } + if (!radeon_emitted(enc->cs, 0)) + enc->session(enc); + enc->encode(enc); + enc->feedback(enc); +} + +static void rvce_end_frame(struct pipe_video_codec *encoder, + struct pipe_video_buffer *source, + struct pipe_picture_desc *picture) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + struct rvce_cpb_slot *slot = LIST_ENTRY( + struct rvce_cpb_slot, enc->cpb_slots.prev, list); + + if (!enc->dual_inst || enc->bs_idx > 1) + flush(enc); + + /* update the CPB backtrack with the just encoded frame */ + slot->picture_type = enc->pic.picture_type; + slot->frame_num = enc->pic.frame_num; + slot->pic_order_cnt = enc->pic.pic_order_cnt; + if (!enc->pic.not_referenced) { + LIST_DEL(&slot->list); + LIST_ADD(&slot->list, &enc->cpb_slots); + } +} + +static void rvce_get_feedback(struct pipe_video_codec *encoder, + void *feedback, unsigned *size) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + struct rvid_buffer *fb = feedback; + + if (size) { + uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + + if (ptr[1]) { + *size = ptr[4] - ptr[9]; + } else { + *size = 0; + } + + enc->ws->buffer_unmap(fb->res->buf); + } + //dump_feedback(enc, fb); + rvid_destroy_buffer(fb); + FREE(fb); +} + +/** + * flush any outstanding command buffers to the hardware + */ +static void rvce_flush(struct pipe_video_codec *encoder) +{ + struct rvce_encoder *enc = (struct rvce_encoder*)encoder; + + flush(enc); +} + +static void rvce_cs_flush(void *ctx, unsigned flags, + struct pipe_fence_handle **fence) +{ + // just ignored +} + +struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, + const struct pipe_video_codec *templ, + struct radeon_winsys* ws, + rvce_get_buffer get_buffer) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; + struct r600_common_context *rctx = (struct r600_common_context*)context; + struct rvce_encoder *enc; + struct pipe_video_buffer *tmp_buf, templat = {}; + struct radeon_surf *tmp_surf; + unsigned cpb_size; + + if (!rscreen->info.vce_fw_version) { + RVID_ERR("Kernel doesn't supports VCE!\n"); + return NULL; + + } else if (!rvce_is_fw_version_supported(rscreen)) { + RVID_ERR("Unsupported VCE fw version loaded!\n"); + return NULL; + } + + enc = CALLOC_STRUCT(rvce_encoder); + if (!enc) + return NULL; + + if (rscreen->info.drm_major == 3) + enc->use_vm = true; + if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) || + rscreen->info.drm_major == 3) + enc->use_vui = true; + + enc->base = *templ; + enc->base.context = context; + + enc->base.destroy = rvce_destroy; + enc->base.begin_frame = rvce_begin_frame; + enc->base.encode_bitstream = rvce_encode_bitstream; + enc->base.end_frame = rvce_end_frame; + enc->base.flush = rvce_flush; + enc->base.get_feedback = rvce_get_feedback; + enc->get_buffer = get_buffer; + + enc->screen = context->screen; + enc->ws = ws; + enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc); + if (!enc->cs) { + RVID_ERR("Can't get command submission context.\n"); + goto error; + } + + templat.buffer_format = PIPE_FORMAT_NV12; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = enc->base.width; + templat.height = enc->base.height; + templat.interlaced = false; + if (!(tmp_buf = context->create_video_buffer(context, &templat))) { + RVID_ERR("Can't create video buffer.\n"); + goto error; + } + + enc->cpb_num = get_cpb_num(enc); + if (!enc->cpb_num) + goto error; + + get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); + + cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32); + + cpb_size = cpb_size * 3 / 2; + cpb_size = cpb_size * enc->cpb_num; + if (enc->dual_pipe) + cpb_size += RVCE_MAX_AUX_BUFFER_NUM * + RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2; + tmp_buf->destroy(tmp_buf); + if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { + RVID_ERR("Can't create CPB buffer.\n"); + goto error; + } + + enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot)); + if (!enc->cpb_array) + goto error; + + reset_cpb(enc); + + goto error; + + return &enc->base; + +error: + if (enc->cs) + enc->ws->cs_destroy(enc->cs); + + rvid_destroy_buffer(&enc->cpb); + + FREE(enc->cpb_array); + FREE(enc); + return NULL; +} + +/** + * check if kernel has the right fw version loaded + */ +bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen) +{ + switch (rscreen->info.vce_fw_version) { + case FW_40_2_2: + case FW_50_0_1: + case FW_50_1_2: + case FW_50_10_2: + case FW_50_17_3: + case FW_52_0_3: + case FW_52_4_3: + case FW_52_8_3: + return true; + default: + if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) + return true; + else + return false; + } +} + +/** + * Add the buffer as relocation to the current command submission + */ +void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, + signed offset) +{ + int reloc_idx; + + reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, + domain, RADEON_PRIO_VCE); + if (enc->use_vm) { + uint64_t addr; + addr = enc->ws->buffer_get_virtual_address(buf); + addr = addr + offset; + RVCE_CS(addr >> 32); + RVCE_CS(addr); + } else { + offset += enc->ws->buffer_get_reloc_offset(buf); + RVCE_CS(reloc_idx * 4); + RVCE_CS(offset); + } +} diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.h b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h new file mode 100644 index 000000000..f79e65c9a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h @@ -0,0 +1,462 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef RADEON_VCE_H +#define RADEON_VCE_H + +#include "util/list.h" + +#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value)) +#define RVCE_BEGIN(cmd) { \ + uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ + RVCE_CS(cmd) +#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) +#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) +#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) +#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; } + +#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) +#define RVCE_MAX_AUX_BUFFER_NUM 4 + +struct r600_common_screen; + +/* driver dependent callback */ +typedef void (*rvce_get_buffer)(struct pipe_resource *resource, + struct pb_buffer **handle, + struct radeon_surf **surface); + +/* Coded picture buffer slot */ +struct rvce_cpb_slot { + struct list_head list; + + unsigned index; + enum pipe_h264_enc_picture_type picture_type; + unsigned frame_num; + unsigned pic_order_cnt; +}; + +struct rvce_rate_control { + uint32_t rc_method; + uint32_t target_bitrate; + uint32_t peak_bitrate; + uint32_t frame_rate_num; + uint32_t gop_size; + uint32_t quant_i_frames; + uint32_t quant_p_frames; + uint32_t quant_b_frames; + uint32_t vbv_buffer_size; + uint32_t frame_rate_den; + uint32_t vbv_buf_lv; + uint32_t max_au_size; + uint32_t qp_initial_mode; + uint32_t target_bits_picture; + uint32_t peak_bits_picture_integer; + uint32_t peak_bits_picture_fraction; + uint32_t min_qp; + uint32_t max_qp; + uint32_t skip_frame_enable; + uint32_t fill_data_enable; + uint32_t enforce_hrd; + uint32_t b_pics_delta_qp; + uint32_t ref_b_pics_delta_qp; + uint32_t rc_reinit_disable; + uint32_t enc_lcvbr_init_qp_flag; + uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag; +}; + +struct rvce_motion_estimation { + uint32_t enc_ime_decimation_search; + uint32_t motion_est_half_pixel; + uint32_t motion_est_quarter_pixel; + uint32_t disable_favor_pmv_point; + uint32_t force_zero_point_center; + uint32_t lsmvert; + uint32_t enc_search_range_x; + uint32_t enc_search_range_y; + uint32_t enc_search1_range_x; + uint32_t enc_search1_range_y; + uint32_t disable_16x16_frame1; + uint32_t disable_satd; + uint32_t enable_amd; + uint32_t enc_disable_sub_mode; + uint32_t enc_ime_skip_x; + uint32_t enc_ime_skip_y; + uint32_t enc_en_ime_overw_dis_subm; + uint32_t enc_ime_overw_dis_subm_no; + uint32_t enc_ime2_search_range_x; + uint32_t enc_ime2_search_range_y; + uint32_t parallel_mode_speedup_enable; + uint32_t fme0_enc_disable_sub_mode; + uint32_t fme1_enc_disable_sub_mode; + uint32_t ime_sw_speedup_enable; +}; + +struct rvce_pic_control { + uint32_t enc_use_constrained_intra_pred; + uint32_t enc_cabac_enable; + uint32_t enc_cabac_idc; + uint32_t enc_loop_filter_disable; + int32_t enc_lf_beta_offset; + int32_t enc_lf_alpha_c0_offset; + uint32_t enc_crop_left_offset; + uint32_t enc_crop_right_offset; + uint32_t enc_crop_top_offset; + uint32_t enc_crop_bottom_offset; + uint32_t enc_num_mbs_per_slice; + uint32_t enc_intra_refresh_num_mbs_per_slot; + uint32_t enc_force_intra_refresh; + uint32_t enc_force_imb_period; + uint32_t enc_pic_order_cnt_type; + uint32_t log2_max_pic_order_cnt_lsb_minus4; + uint32_t enc_sps_id; + uint32_t enc_pps_id; + uint32_t enc_constraint_set_flags; + uint32_t enc_b_pic_pattern; + uint32_t weight_pred_mode_b_picture; + uint32_t enc_number_of_reference_frames; + uint32_t enc_max_num_ref_frames; + uint32_t enc_num_default_active_ref_l0; + uint32_t enc_num_default_active_ref_l1; + uint32_t enc_slice_mode; + uint32_t enc_max_slice_size; +}; + +struct rvce_task_info { + uint32_t offset_of_next_task_info; + uint32_t task_operation; + uint32_t reference_picture_dependency; + uint32_t collocate_flag_dependency; + uint32_t feedback_index; + uint32_t video_bitstream_ring_index; +}; + +struct rvce_feedback_buf_pkg { + uint32_t feedback_ring_address_hi; + uint32_t feedback_ring_address_lo; + uint32_t feedback_ring_size; +}; + +struct rvce_rdo { + uint32_t enc_disable_tbe_pred_i_frame; + uint32_t enc_disable_tbe_pred_p_frame; + uint32_t use_fme_interpol_y; + uint32_t use_fme_interpol_uv; + uint32_t use_fme_intrapol_y; + uint32_t use_fme_intrapol_uv; + uint32_t use_fme_interpol_y_1; + uint32_t use_fme_interpol_uv_1; + uint32_t use_fme_intrapol_y_1; + uint32_t use_fme_intrapol_uv_1; + uint32_t enc_16x16_cost_adj; + uint32_t enc_skip_cost_adj; + uint32_t enc_force_16x16_skip; + uint32_t enc_disable_threshold_calc_a; + uint32_t enc_luma_coeff_cost; + uint32_t enc_luma_mb_coeff_cost; + uint32_t enc_chroma_coeff_cost; +}; + +struct rvce_vui { + uint32_t aspect_ratio_info_present_flag; + uint32_t aspect_ratio_idc; + uint32_t sar_width; + uint32_t sar_height; + uint32_t overscan_info_present_flag; + uint32_t overscan_Approp_flag; + uint32_t video_signal_type_present_flag; + uint32_t video_format; + uint32_t video_full_range_flag; + uint32_t color_description_present_flag; + uint32_t color_prim; + uint32_t transfer_char; + uint32_t matrix_coef; + uint32_t chroma_loc_info_present_flag; + uint32_t chroma_loc_top; + uint32_t chroma_loc_bottom; + uint32_t timing_info_present_flag; + uint32_t num_units_in_tick; + uint32_t time_scale; + uint32_t fixed_frame_rate_flag; + uint32_t nal_hrd_parameters_present_flag; + uint32_t cpb_cnt_minus1; + uint32_t bit_rate_scale; + uint32_t cpb_size_scale; + uint32_t bit_rate_value_minus; + uint32_t cpb_size_value_minus; + uint32_t cbr_flag; + uint32_t initial_cpb_removal_delay_length_minus1; + uint32_t cpb_removal_delay_length_minus1; + uint32_t dpb_output_delay_length_minus1; + uint32_t time_offset_length; + uint32_t low_delay_hrd_flag; + uint32_t pic_struct_present_flag; + uint32_t bitstream_restriction_present_flag; + uint32_t motion_vectors_over_pic_boundaries_flag; + uint32_t max_bytes_per_pic_denom; + uint32_t max_bits_per_mb_denom; + uint32_t log2_max_mv_length_hori; + uint32_t log2_max_mv_length_vert; + uint32_t num_reorder_frames; + uint32_t max_dec_frame_buffering; +}; + +struct rvce_enc_operation { + uint32_t insert_headers; + uint32_t picture_structure; + uint32_t allowed_max_bitstream_size; + uint32_t force_refresh_map; + uint32_t insert_aud; + uint32_t end_of_sequence; + uint32_t end_of_stream; + uint32_t input_picture_luma_address_hi; + uint32_t input_picture_luma_address_lo; + uint32_t input_picture_chroma_address_hi; + uint32_t input_picture_chroma_address_lo; + uint32_t enc_input_frame_y_pitch; + uint32_t enc_input_pic_luma_pitch; + uint32_t enc_input_pic_chroma_pitch;; + uint32_t enc_input_pic_addr_array; + uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload; + uint32_t enc_input_pic_tile_config; + uint32_t enc_pic_type; + uint32_t enc_idr_flag; + uint32_t enc_idr_pic_id; + uint32_t enc_mgs_key_pic; + uint32_t enc_reference_flag; + uint32_t enc_temporal_layer_index; + uint32_t num_ref_idx_active_override_flag; + uint32_t num_ref_idx_l0_active_minus1; + uint32_t num_ref_idx_l1_active_minus1; + uint32_t enc_ref_list_modification_op; + uint32_t enc_ref_list_modification_num; + uint32_t enc_decoded_picture_marking_op; + uint32_t enc_decoded_picture_marking_num; + uint32_t enc_decoded_picture_marking_idx; + uint32_t enc_decoded_ref_base_picture_marking_op; + uint32_t enc_decoded_ref_base_picture_marking_num; + uint32_t l0_picture_structure; + uint32_t l0_enc_pic_type; + uint32_t l0_frame_number; + uint32_t l0_picture_order_count; + uint32_t l0_luma_offset; + uint32_t l0_chroma_offset; + uint32_t l1_picture_structure; + uint32_t l1_enc_pic_type; + uint32_t l1_frame_number; + uint32_t l1_picture_order_count; + uint32_t l1_luma_offset; + uint32_t l1_chroma_offset; + uint32_t enc_reconstructed_luma_offset; + uint32_t enc_reconstructed_chroma_offset;; + uint32_t enc_coloc_buffer_offset; + uint32_t enc_reconstructed_ref_base_picture_luma_offset; + uint32_t enc_reconstructed_ref_base_picture_chroma_offset; + uint32_t enc_reference_ref_base_picture_luma_offset; + uint32_t enc_reference_ref_base_picture_chroma_offset; + uint32_t picture_count; + uint32_t frame_number; + uint32_t picture_order_count; + uint32_t num_i_pic_remain_in_rcgop; + uint32_t num_p_pic_remain_in_rcgop; + uint32_t num_b_pic_remain_in_rcgop; + uint32_t num_ir_pic_remain_in_rcgop; + uint32_t enable_intra_refresh; + uint32_t aq_variance_en; + uint32_t aq_block_size; + uint32_t aq_mb_variance_sel; + uint32_t aq_frame_variance_sel; + uint32_t aq_param_a; + uint32_t aq_param_b; + uint32_t aq_param_c; + uint32_t aq_param_d; + uint32_t aq_param_e; + uint32_t context_in_sfb; +}; + +struct rvce_enc_create { + uint32_t enc_use_circular_buffer; + uint32_t enc_profile; + uint32_t enc_level; + uint32_t enc_pic_struct_restriction; + uint32_t enc_image_width; + uint32_t enc_image_height; + uint32_t enc_ref_pic_luma_pitch; + uint32_t enc_ref_pic_chroma_pitch; + uint32_t enc_ref_y_height_in_qw; + uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo; + uint32_t enc_pre_encode_context_buffer_offset; + uint32_t enc_pre_encode_input_luma_buffer_offset; + uint32_t enc_pre_encode_input_chroma_buffer_offset; + uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity; +}; + +struct rvce_config_ext { + uint32_t enc_enable_perf_logging; +}; + +struct rvce_h264_enc_pic { + struct rvce_rate_control rc; + struct rvce_motion_estimation me; + struct rvce_pic_control pc; + struct rvce_task_info ti; + struct rvce_feedback_buf_pkg fb; + struct rvce_rdo rdo; + struct rvce_vui vui; + struct rvce_enc_operation eo; + struct rvce_enc_create ec; + struct rvce_config_ext ce; + + unsigned quant_i_frames; + unsigned quant_p_frames; + unsigned quant_b_frames; + + enum pipe_h264_enc_picture_type picture_type; + unsigned frame_num; + unsigned frame_num_cnt; + unsigned p_remain; + unsigned i_remain; + unsigned idr_pic_id; + unsigned gop_cnt; + unsigned gop_size; + unsigned pic_order_cnt; + unsigned ref_idx_l0; + unsigned ref_idx_l1; + unsigned addrmode_arraymode_disrdo_distwoinstants; + + bool not_referenced; + bool is_idr; + bool has_ref_pic_list; + bool enable_vui; + unsigned int ref_pic_list_0[32]; + unsigned int ref_pic_list_1[32]; + unsigned int frame_idx[32]; +}; + +/* VCE encoder representation */ +struct rvce_encoder { + struct pipe_video_codec base; + + /* version specific packets */ + void (*session)(struct rvce_encoder *enc); + void (*create)(struct rvce_encoder *enc); + void (*feedback)(struct rvce_encoder *enc); + void (*rate_control)(struct rvce_encoder *enc); + void (*config_extension)(struct rvce_encoder *enc); + void (*pic_control)(struct rvce_encoder *enc); + void (*motion_estimation)(struct rvce_encoder *enc); + void (*rdo)(struct rvce_encoder *enc); + void (*vui)(struct rvce_encoder *enc); + void (*config)(struct rvce_encoder *enc); + void (*encode)(struct rvce_encoder *enc); + void (*destroy)(struct rvce_encoder *enc); + void (*task_info)(struct rvce_encoder *enc, uint32_t op, + uint32_t dep, uint32_t fb_idx, + uint32_t ring_idx); + + unsigned stream_handle; + + struct pipe_screen *screen; + struct radeon_winsys* ws; + struct radeon_winsys_cs* cs; + + rvce_get_buffer get_buffer; + + struct pb_buffer* handle; + struct radeon_surf* luma; + struct radeon_surf* chroma; + + struct pb_buffer* bs_handle; + unsigned bs_size; + + struct rvce_cpb_slot *cpb_array; + struct list_head cpb_slots; + unsigned cpb_num; + + struct rvid_buffer *fb; + struct rvid_buffer cpb; + struct pipe_h264_enc_picture_desc pic; + struct rvce_h264_enc_pic enc_pic; + + unsigned task_info_idx; + unsigned bs_idx; + + bool use_vm; + bool use_vui; + bool dual_pipe; + bool dual_inst; +}; + +/* CPB handling functions */ +struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc); +void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, + signed *luma_offset, signed *chroma_offset); + +struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, + const struct pipe_video_codec *templat, + struct radeon_winsys* ws, + rvce_get_buffer get_buffer); + +bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen); + +void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, + signed offset); + +/* init vce fw 40.2.2 specific callbacks */ +void radeon_vce_40_2_2_init(struct rvce_encoder *enc); + +/* init vce fw 50 specific callbacks */ +void radeon_vce_50_init(struct rvce_encoder *enc); + +/* init vce fw 52 specific callbacks */ +void radeon_vce_52_init(struct rvce_encoder *enc); + +/* version specific function for getting parameters */ +void (*get_pic_param)(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); + +/* get parameters for vce 40.2.2 */ +void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); + +/* get parameters for vce 50 */ +void radeon_vce_50_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); + +/* get parameters for vce 52 */ +void radeon_vce_52_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.c b/lib/mesa/src/gallium/drivers/r600/radeon_video.c new file mode 100644 index 000000000..c7acc3d6e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.c @@ -0,0 +1,349 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include <unistd.h> + +#include "util/u_memory.h" +#include "util/u_video.h" + +#include "vl/vl_defines.h" +#include "vl/vl_video_buffer.h" + +#include "r600_pipe_common.h" +#include "radeon_video.h" +#include "radeon_vce.h" + +#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) + +/* generate an stream handle */ +unsigned rvid_alloc_stream_handle() +{ + static unsigned counter = 0; + unsigned stream_handle = 0; + unsigned pid = getpid(); + int i; + + for (i = 0; i < 32; ++i) + stream_handle |= ((pid >> i) & 1) << (31 - i); + + stream_handle ^= ++counter; + return stream_handle; +} + +/* create a buffer in the winsys */ +bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, + unsigned size, unsigned usage) +{ + memset(buffer, 0, sizeof(*buffer)); + buffer->usage = usage; + + /* Hardware buffer placement restrictions require the kernel to be + * able to move buffers around individually, so request a + * non-sub-allocated buffer. + */ + buffer->res = (struct r600_resource *) + pipe_buffer_create(screen, PIPE_BIND_SHARED, + usage, size); + + return buffer->res != NULL; +} + +/* destroy a buffer */ +void rvid_destroy_buffer(struct rvid_buffer *buffer) +{ + r600_resource_reference(&buffer->res, NULL); +} + +/* reallocate a buffer, preserving its content */ +bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, + struct rvid_buffer *new_buf, unsigned new_size) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + struct radeon_winsys* ws = rscreen->ws; + unsigned bytes = MIN2(new_buf->res->buf->size, new_size); + struct rvid_buffer old_buf = *new_buf; + void *src = NULL, *dst = NULL; + + if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage)) + goto error; + + src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ); + if (!src) + goto error; + + dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE); + if (!dst) + goto error; + + memcpy(dst, src, bytes); + if (new_size > bytes) { + new_size -= bytes; + dst += bytes; + memset(dst, 0, new_size); + } + ws->buffer_unmap(new_buf->res->buf); + ws->buffer_unmap(old_buf.res->buf); + rvid_destroy_buffer(&old_buf); + return true; + +error: + if (src) + ws->buffer_unmap(old_buf.res->buf); + rvid_destroy_buffer(new_buf); + *new_buf = old_buf; + return false; +} + +/* clear the buffer with zeros */ +void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)context; + + rctx->dma_clear_buffer(context, &buffer->res->b.b, 0, + buffer->res->buf->size, 0); + context->flush(context, NULL, 0); +} + +/** + * join surfaces into the same buffer with identical tiling params + * sumup their sizes and replace the backend buffers with a single bo + */ +void rvid_join_surfaces(struct r600_common_context *rctx, + struct pb_buffer** buffers[VL_NUM_COMPONENTS], + struct radeon_surf *surfaces[VL_NUM_COMPONENTS]) +{ + struct radeon_winsys* ws; + unsigned best_tiling, best_wh, off; + unsigned size, alignment; + struct pb_buffer *pb; + unsigned i, j; + + ws = rctx->ws; + + for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) { + unsigned wh; + + if (!surfaces[i]) + continue; + + /* choose the smallest bank w/h for now */ + wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh; + if (wh < best_wh) { + best_wh = wh; + best_tiling = i; + } + } + + for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) { + if (!surfaces[i]) + continue; + + /* adjust the texture layer offsets */ + off = align(off, surfaces[i]->surf_alignment); + + /* copy the tiling parameters */ + surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw; + surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh; + surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea; + surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split; + + for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j) + surfaces[i]->u.legacy.level[j].offset += off; + + off += surfaces[i]->surf_size; + } + + for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) { + if (!buffers[i] || !*buffers[i]) + continue; + + size = align(size, (*buffers[i])->alignment); + size += (*buffers[i])->size; + alignment = MAX2(alignment, (*buffers[i])->alignment * 1); + } + + if (!size) + return; + + /* TODO: 2D tiling workaround */ + alignment *= 2; + + pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM, + RADEON_FLAG_GTT_WC); + if (!pb) + return; + + for (i = 0; i < VL_NUM_COMPONENTS; ++i) { + if (!buffers[i] || !*buffers[i]) + continue; + + pb_reference(buffers[i], pb); + } + + pb_reference(&pb, NULL); +} + +int rvid_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + enum pipe_video_format codec = u_reduce_video_profile(profile); + struct radeon_info info; + + rscreen->ws->query_info(rscreen->ws, &info); + + if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && + rvce_is_fw_version_supported(rscreen); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + return 2048; + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return 1152; + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + return PIPE_FORMAT_NV12; + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: + return false; + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: + return false; + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: + return true; + case PIPE_VIDEO_CAP_STACKED_FRAMES: + return 1; + default: + return 0; + } + } + + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + switch (codec) { + case PIPE_VIDEO_FORMAT_MPEG12: + return profile != PIPE_VIDEO_PROFILE_MPEG1; + case PIPE_VIDEO_FORMAT_MPEG4: + /* no support for MPEG4 on older hw */ + return rscreen->family >= CHIP_PALM; + case PIPE_VIDEO_FORMAT_MPEG4_AVC: + return true; + case PIPE_VIDEO_FORMAT_VC1: + return true; + case PIPE_VIDEO_FORMAT_HEVC: + return false; + case PIPE_VIDEO_FORMAT_JPEG: + return false; + default: + return false; + } + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + return 2048; + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return 1152; + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + return PIPE_FORMAT_P016; + else + return PIPE_FORMAT_NV12; + + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: + if (rscreen->family < CHIP_PALM) { + /* MPEG2 only with shaders and no support for + interlacing on R6xx style UVD */ + return codec != PIPE_VIDEO_FORMAT_MPEG12 && + rscreen->family > CHIP_RV770; + } else { + enum pipe_video_format format = u_reduce_video_profile(profile); + + if (format == PIPE_VIDEO_FORMAT_HEVC) + return false; //The firmware doesn't support interlaced HEVC. + else if (format == PIPE_VIDEO_FORMAT_JPEG) + return false; + return true; + } + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: + return true; + case PIPE_VIDEO_CAP_MAX_LEVEL: + switch (profile) { + case PIPE_VIDEO_PROFILE_MPEG1: + return 0; + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return 3; + case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: + return 3; + case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: + return 5; + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + return 1; + case PIPE_VIDEO_PROFILE_VC1_MAIN: + return 2; + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + return 4; + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return 41; + case PIPE_VIDEO_PROFILE_HEVC_MAIN: + case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: + return 186; + default: + return 0; + } + default: + return 0; + } +} + +boolean rvid_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint) +{ + /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */ + if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + return (format == PIPE_FORMAT_NV12) || + (format == PIPE_FORMAT_P016); + + /* we can only handle this one with UVD */ + if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) + return format == PIPE_FORMAT_NV12; + + return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); +} diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.h b/lib/mesa/src/gallium/drivers/r600/radeon_video.h new file mode 100644 index 000000000..3347c4ebc --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef RADEON_VIDEO_H +#define RADEON_VIDEO_H + +#include "radeon/radeon_winsys.h" +#include "vl/vl_video_buffer.h" + +#define RVID_ERR(fmt, args...) \ + fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args) + +/* video buffer representation */ +struct rvid_buffer +{ + unsigned usage; + struct r600_resource *res; +}; + +/* generate an stream handle */ +unsigned rvid_alloc_stream_handle(void); + +/* create a buffer in the winsys */ +bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer, + unsigned size, unsigned usage); + +/* destroy a buffer */ +void rvid_destroy_buffer(struct rvid_buffer *buffer); + +/* reallocate a buffer, preserving its content */ +bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, + struct rvid_buffer *new_buf, unsigned new_size); + +/* clear the buffer with zeros */ +void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer); + +/* join surfaces into the same buffer with identical tiling params + sumup their sizes and replace the backend buffers with a single bo */ +void rvid_join_surfaces(struct r600_common_context *rctx, + struct pb_buffer** buffers[VL_NUM_COMPONENTS], + struct radeon_surf *surfaces[VL_NUM_COMPONENTS]); + +/* returns supported codecs and other parameters */ +int rvid_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param); + +/* the hardware only supports NV12 */ +boolean rvid_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint); + +#endif // RADEON_VIDEO_H diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index ae92a767b..09a326ef6 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -516,7 +516,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { n->src.push_back(get_cf_index_value(1)); } - if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && + if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && ctx.is_cayman()) // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h index 28bdf15b8..5bfce1ca7 100644 --- a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h +++ b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h @@ -31,7 +31,7 @@ #define R600_CS_H #include "r600_pipe_common.h" -#include "amd/common/r600d_common.h" +#include "amd/common/sid.h" /** * Return true if there is enough memory in VRAM and GTT for the buffers @@ -113,27 +113,12 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx, return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); } -static inline void r600_emit_reloc(struct r600_common_context *rctx, - struct r600_ring *ring, struct r600_resource *rbo, - enum radeon_bo_usage usage, - enum radeon_bo_priority priority) -{ - struct radeon_winsys_cs *cs = ring->cs; - bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory; - unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority); - - if (!has_vm) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, reloc); - } -} - static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { - assert(reg < R600_CONTEXT_REG_OFFSET); + assert(reg < SI_CONTEXT_REG_OFFSET); assert(cs->current.cdw + 2 + num <= cs->current.max_dw); radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); - radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2); + radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2); } static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) @@ -144,10 +129,10 @@ static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned r static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { - assert(reg >= R600_CONTEXT_REG_OFFSET); + assert(reg >= SI_CONTEXT_REG_OFFSET); assert(cs->current.cdw + 2 + num <= cs->current.max_dw); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); - radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); } static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) @@ -160,10 +145,10 @@ static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs, unsigned reg, unsigned idx, unsigned value) { - assert(reg >= R600_CONTEXT_REG_OFFSET); + assert(reg >= SI_CONTEXT_REG_OFFSET); assert(cs->current.cdw + 3 <= cs->current.max_dw); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); - radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); radeon_emit(cs, value); } diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h index f79e65c9a..f34a8eaf8 100644 --- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h +++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h @@ -40,9 +40,9 @@ #define RVCE_BEGIN(cmd) { \ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \ RVCE_CS(cmd) -#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) -#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) -#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) +#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off)) +#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off)) +#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off)) #define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; } #define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) @@ -417,46 +417,46 @@ struct rvce_encoder { }; /* CPB handling functions */ -struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc); -struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc); -struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc); -void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, - signed *luma_offset, signed *chroma_offset); +struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc); +struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc); +void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, + signed *luma_offset, signed *chroma_offset); -struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, - const struct pipe_video_codec *templat, - struct radeon_winsys* ws, - rvce_get_buffer get_buffer); +struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, + const struct pipe_video_codec *templat, + struct radeon_winsys* ws, + rvce_get_buffer get_buffer); -bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen); +bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen); -void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, - enum radeon_bo_usage usage, enum radeon_bo_domain domain, - signed offset); +void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, + enum radeon_bo_usage usage, enum radeon_bo_domain domain, + signed offset); /* init vce fw 40.2.2 specific callbacks */ -void radeon_vce_40_2_2_init(struct rvce_encoder *enc); +void si_vce_40_2_2_init(struct rvce_encoder *enc); /* init vce fw 50 specific callbacks */ -void radeon_vce_50_init(struct rvce_encoder *enc); +void si_vce_50_init(struct rvce_encoder *enc); /* init vce fw 52 specific callbacks */ -void radeon_vce_52_init(struct rvce_encoder *enc); +void si_vce_52_init(struct rvce_encoder *enc); /* version specific function for getting parameters */ -void (*get_pic_param)(struct rvce_encoder *enc, +void (*si_get_pic_param)(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic); /* get parameters for vce 40.2.2 */ -void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_40_2_2_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); /* get parameters for vce 50 */ -void radeon_vce_50_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_50_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); /* get parameters for vce 52 */ -void radeon_vce_52_get_param(struct rvce_encoder *enc, - struct pipe_h264_enc_picture_desc *pic); +void si_vce_52_get_param(struct rvce_encoder *enc, + struct pipe_h264_enc_picture_desc *pic); #endif diff --git a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h new file mode 100644 index 000000000..7f57b4ea8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h @@ -0,0 +1,10 @@ +// DriConf options specific to radeonsi +DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_RADEONSI_ENABLE_SISCHED("false") + DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") + DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") +DRI_CONF_SECTION_END + +DRI_CONF_SECTION_DEBUG + DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false") +DRI_CONF_SECTION_END diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h b/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h new file mode 100644 index 000000000..532151125 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h @@ -0,0 +1,49 @@ +// DO NOT EDIT - this file is automatically generated by merge_driinfo.py + +/* +Use as: + +#include "xmlpool.h" + +static const char driinfo_xml[] = +#include "this_file" +; +*/ + +DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_MESA_GLTHREAD("false") + DRI_CONF_MESA_NO_ERROR("false") + DRI_CONF_DISABLE_EXT_BUFFER_AGE("false") + DRI_CONF_DISABLE_OML_SYNC_CONTROL("false") + DRI_CONF_RADEONSI_ENABLE_SISCHED("false") + DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") + DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false") + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_PP_CELSHADE(0) + DRI_CONF_PP_NORED(0) + DRI_CONF_PP_NOGREEN(0) + DRI_CONF_PP_NOBLUE(0) + DRI_CONF_PP_JIMENEZMLAA(0, 0, 32) + DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false") + DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") + DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false") + DRI_CONF_FORCE_GLSL_VERSION(0) + DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false") + DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false") + DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false") + DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false") + DRI_CONF_FORCE_GLSL_ABS_SQRT("false") + DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false") + DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false") + DRI_CONF_SECTION_END + DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false") + DRI_CONF_GLSL_ZERO_INIT("false") + DRI_CONF_SECTION_END +DRI_CONF_END diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c new file mode 100644 index 000000000..7a8822738 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -0,0 +1,508 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_shader.h" +#include "si_shader_internal.h" + +#include "ac_nir_to_llvm.h" + +#include "tgsi/tgsi_from_mesa.h" + +#include "compiler/nir/nir.h" +#include "compiler/nir_types.h" + + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static void scan_instruction(struct tgsi_shader_info *info, + nir_instr *instr) +{ + if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + info->uses_derivatives = true; + break; + default: + break; + } + } else if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + info->uses_derivatives = true; + break; + default: + break; + } + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_front_face: + info->uses_frontface = 1; + break; + case nir_intrinsic_load_instance_id: + info->uses_instanceid = 1; + break; + case nir_intrinsic_load_vertex_id: + info->uses_vertexid = 1; + break; + case nir_intrinsic_load_vertex_id_zero_base: + info->uses_vertexid_nobase = 1; + break; + case nir_intrinsic_load_base_vertex: + info->uses_basevertex = 1; + break; + case nir_intrinsic_load_primitive_id: + info->uses_primid = 1; + break; + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_store_ssbo: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + info->writes_memory = true; + break; + default: + break; + } + } +} + +void si_nir_scan_shader(const struct nir_shader *nir, + struct tgsi_shader_info *info) +{ + nir_function *func; + unsigned i; + + assert(nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_FRAGMENT); + + info->processor = pipe_shader_type_from_mesa(nir->info.stage); + info->num_tokens = 2; /* indicate that the shader is non-empty */ + info->num_instructions = 2; + + info->num_inputs = nir->num_inputs; + info->num_outputs = nir->num_outputs; + + i = 0; + nir_foreach_variable(variable, &nir->inputs) { + unsigned semantic_name, semantic_index; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, + nir->info.stage == MESA_SHADER_VERTEX); + + assert(attrib_count == 1 && "not implemented"); + + /* Vertex shader inputs don't have semantics. The state + * tracker has already mapped them to attributes via + * variable->data.driver_location. + */ + if (nir->info.stage == MESA_SHADER_VERTEX) + continue; + + /* Fragment shader position is a system value. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && + variable->data.location == VARYING_SLOT_POS) { + if (variable->data.pixel_center_integer) + info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = + TGSI_FS_COORD_PIXEL_CENTER_INTEGER; + continue; + } + + tgsi_get_gl_varying_semantic(variable->data.location, true, + &semantic_name, &semantic_index); + + info->input_semantic_name[i] = semantic_name; + info->input_semantic_index[i] = semantic_index; + + if (variable->data.sample) + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; + else if (variable->data.centroid) + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; + else + info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; + + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(variable->type)); + + switch (variable->data.interpolation) { + case INTERP_MODE_NONE: + if (glsl_base_type_is_integer(base_type)) { + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } + + if (semantic_name == TGSI_SEMANTIC_COLOR) { + info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; + goto persp_locations; + } + /* fall-through */ + case INTERP_MODE_SMOOTH: + assert(!glsl_base_type_is_integer(base_type)); + + info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; + + persp_locations: + if (variable->data.sample) + info->uses_persp_sample = true; + else if (variable->data.centroid) + info->uses_persp_centroid = true; + else + info->uses_persp_center = true; + break; + + case INTERP_MODE_NOPERSPECTIVE: + assert(!glsl_base_type_is_integer(base_type)); + + info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; + + if (variable->data.sample) + info->uses_linear_sample = true; + else if (variable->data.centroid) + info->uses_linear_centroid = true; + else + info->uses_linear_center = true; + break; + + case INTERP_MODE_FLAT: + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } + + /* TODO make this more precise */ + if (variable->data.location == VARYING_SLOT_COL0) + info->colors_read |= 0x0f; + else if (variable->data.location == VARYING_SLOT_COL1) + info->colors_read |= 0xf0; + + i++; + } + + i = 0; + nir_foreach_variable(variable, &nir->outputs) { + unsigned semantic_name, semantic_index; + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + tgsi_get_gl_frag_result_semantic(variable->data.location, + &semantic_name, &semantic_index); + } else { + tgsi_get_gl_varying_semantic(variable->data.location, true, + &semantic_name, &semantic_index); + } + + info->output_semantic_name[i] = semantic_name; + info->output_semantic_index[i] = semantic_index; + info->output_usagemask[i] = TGSI_WRITEMASK_XYZW; + + switch (semantic_name) { + case TGSI_SEMANTIC_PRIMID: + info->writes_primid = true; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + info->writes_viewport_index = true; + break; + case TGSI_SEMANTIC_LAYER: + info->writes_layer = true; + break; + case TGSI_SEMANTIC_PSIZE: + info->writes_psize = true; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + info->writes_clipvertex = true; + break; + case TGSI_SEMANTIC_COLOR: + info->colors_written |= 1 << semantic_index; + break; + case TGSI_SEMANTIC_STENCIL: + info->writes_stencil = true; + break; + case TGSI_SEMANTIC_SAMPLEMASK: + info->writes_samplemask = true; + break; + case TGSI_SEMANTIC_EDGEFLAG: + info->writes_edgeflag = true; + break; + case TGSI_SEMANTIC_POSITION: + if (info->processor == PIPE_SHADER_FRAGMENT) + info->writes_z = true; + else + info->writes_position = true; + break; + } + + i++; + } + + nir_foreach_variable(variable, &nir->uniforms) { + const struct glsl_type *type = variable->type; + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(type)); + unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); + + /* We rely on the fact that nir_lower_samplers_as_deref has + * eliminated struct dereferences. + */ + if (base_type == GLSL_TYPE_SAMPLER) + info->samplers_declared |= + u_bit_consecutive(variable->data.binding, aoa_size); + else if (base_type == GLSL_TYPE_IMAGE) + info->images_declared |= + u_bit_consecutive(variable->data.binding, aoa_size); + } + + info->num_written_clipdistance = nir->info.clip_distance_array_size; + info->num_written_culldistance = nir->info.cull_distance_array_size; + info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); + info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance); + + if (info->processor == PIPE_SHADER_FRAGMENT) + info->uses_kill = nir->info.fs.uses_discard; + + /* TODO make this more accurate */ + info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS); + info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); + + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) + scan_instruction(info, instr); + } +} + +/** + * Perform "lowering" operations on the NIR that are run once when the shader + * selector is created. + */ +void +si_lower_nir(struct si_shader_selector* sel) +{ + /* Adjust the driver location of inputs and outputs. The state tracker + * interprets them as slots, while the ac/nir backend interprets them + * as individual components. + */ + nir_foreach_variable(variable, &sel->nir->inputs) + variable->data.driver_location *= 4; + + nir_foreach_variable(variable, &sel->nir->outputs) { + variable->data.driver_location *= 4; + + if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { + if (variable->data.location == FRAG_RESULT_DEPTH) + variable->data.driver_location += 2; + else if (variable->data.location == FRAG_RESULT_STENCIL) + variable->data.driver_location += 1; + } + } + + /* Perform lowerings (and optimizations) of code. + * + * Performance considerations aside, we must: + * - lower certain ALU operations + * - ensure constant offsets for texture instructions are folded + * and copy-propagated + */ + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size, + (nir_lower_io_options)0); + NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo); + + NIR_PASS_V(sel->nir, nir_lower_returns); + NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + + static const struct nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + }; + NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); + + bool progress; + do { + progress = false; + + /* (Constant) copy propagation is needed for txf with offsets. */ + NIR_PASS(progress, sel->nir, nir_copy_prop); + NIR_PASS(progress, sel->nir, nir_opt_remove_phis); + NIR_PASS(progress, sel->nir, nir_opt_dce); + if (nir_opt_trivial_continues(sel->nir)) { + progress = true; + NIR_PASS(progress, sel->nir, nir_copy_prop); + NIR_PASS(progress, sel->nir, nir_opt_dce); + } + NIR_PASS(progress, sel->nir, nir_opt_if); + NIR_PASS(progress, sel->nir, nir_opt_dead_cf); + NIR_PASS(progress, sel->nir, nir_opt_cse); + NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8); + + /* Needed for algebraic lowering */ + NIR_PASS(progress, sel->nir, nir_opt_algebraic); + NIR_PASS(progress, sel->nir, nir_opt_constant_folding); + + NIR_PASS(progress, sel->nir, nir_opt_undef); + NIR_PASS(progress, sel->nir, nir_opt_conditional_discard); + if (sel->nir->options->max_unroll_iterations) { + NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0); + } + } while (progress); +} + +static void declare_nir_input_vs(struct si_shader_context *ctx, + struct nir_variable *variable, unsigned rel, + LLVMValueRef out[4]) +{ + si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out); +} + +static void declare_nir_input_fs(struct si_shader_context *ctx, + struct nir_variable *variable, unsigned rel, + unsigned *fs_attr_idx, + LLVMValueRef out[4]) +{ + unsigned slot = variable->data.location + rel; + + assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0); + + if (slot == VARYING_SLOT_POS) { + out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT); + out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT); + out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT); + out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, + LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)); + return; + } + + si_llvm_load_input_fs(ctx, *fs_attr_idx, out); + (*fs_attr_idx)++; +} + +static LLVMValueRef +si_nir_load_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, unsigned base_index, + unsigned constant_index, LLVMValueRef dynamic_index, + enum ac_descriptor_type desc_type, bool image, + bool write) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); + LLVMValueRef index = dynamic_index; + + assert(!descriptor_set); + + if (!index) + index = ctx->ac.i32_0; + + index = LLVMBuildAdd(builder, index, + LLVMConstInt(ctx->ac.i32, base_index + constant_index, false), + ""); + + if (image) { + assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER); + assert(base_index + constant_index < ctx->num_images); + + if (dynamic_index) + index = si_llvm_bound_index(ctx, index, ctx->num_images); + + index = LLVMBuildSub(ctx->gallivm.builder, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), + index, ""); + + /* TODO: be smarter about when we use dcc_off */ + return si_load_image_desc(ctx, list, index, desc_type, write); + } + + assert(base_index + constant_index < ctx->num_samplers); + + if (dynamic_index) + index = si_llvm_bound_index(ctx, index, ctx->num_samplers); + + index = LLVMBuildAdd(ctx->gallivm.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); + + return si_load_sampler_desc(ctx, list, index, desc_type); +} + +bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) +{ + struct tgsi_shader_info *info = &ctx->shader->selector->info; + + unsigned fs_attr_idx = 0; + nir_foreach_variable(variable, &nir->inputs) { + unsigned attrib_count = glsl_count_attribute_slots(variable->type, + nir->info.stage == MESA_SHADER_VERTEX); + unsigned input_idx = variable->data.driver_location; + + for (unsigned i = 0; i < attrib_count; ++i) { + LLVMValueRef data[4]; + + if (nir->info.stage == MESA_SHADER_VERTEX) + declare_nir_input_vs(ctx, variable, i, data); + else if (nir->info.stage == MESA_SHADER_FRAGMENT) + declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data); + + for (unsigned chan = 0; chan < 4; chan++) { + ctx->inputs[input_idx + chan] = + LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, ""); + } + } + } + + ctx->abi.inputs = &ctx->inputs[0]; + ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; + ctx->abi.clamp_shadow_reference = true; + + ctx->num_samplers = util_last_bit(info->samplers_declared); + ctx->num_images = util_last_bit(info->images_declared); + + ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL); + + return true; +} diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c new file mode 100644 index 000000000..8d98d6d0d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c @@ -0,0 +1,448 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* This file handles register programming of primitive binning. */ + +#include "si_pipe.h" +#include "sid.h" +#include "gfx9d.h" +#include "radeon/r600_cs.h" + +struct uvec2 { + unsigned x, y; +}; + +struct si_bin_size_map { + unsigned start; + unsigned bin_size_x; + unsigned bin_size_y; +}; + +typedef struct si_bin_size_map si_bin_size_subtable[3][9]; + +/* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */ +static struct uvec2 si_find_bin_size(struct si_screen *sscreen, + const si_bin_size_subtable table[], + unsigned sum) +{ + unsigned log_num_rb_per_se = + util_logbase2_ceil(sscreen->b.info.num_render_backends / + sscreen->b.info.max_se); + unsigned log_num_se = util_logbase2_ceil(sscreen->b.info.max_se); + unsigned i; + + /* Get the chip-specific subtable. */ + const struct si_bin_size_map *subtable = + &table[log_num_rb_per_se][log_num_se][0]; + + for (i = 0; subtable[i].start != UINT_MAX; i++) { + if (sum >= subtable[i].start && sum < subtable[i + 1].start) + break; + } + + struct uvec2 size = {subtable[i].bin_size_x, subtable[i].bin_size_y}; + return size; +} + +static struct uvec2 si_get_color_bin_size(struct si_context *sctx, + unsigned cb_target_enabled_4bit) +{ + unsigned nr_samples = sctx->framebuffer.nr_samples; + unsigned sum = 0; + + /* Compute the sum of all Bpp. */ + for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { + if (!(cb_target_enabled_4bit & (0xf << (i * 4)))) + continue; + + struct r600_texture *rtex = + (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; + sum += rtex->surface.bpe; + } + + /* Multiply the sum by some function of the number of samples. */ + if (nr_samples >= 2) { + if (sctx->ps_iter_samples >= 2) + sum *= nr_samples; + else + sum *= 2; + } + + static const si_bin_size_subtable table[] = { + { + /* One RB / SE */ + { + /* One shader engine */ + { 0, 128, 128 }, + { 1, 64, 128 }, + { 2, 32, 128 }, + { 3, 16, 128 }, + { 17, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Two shader engines */ + { 0, 128, 128 }, + { 2, 64, 128 }, + { 3, 32, 128 }, + { 5, 16, 128 }, + { 17, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Four shader engines */ + { 0, 128, 128 }, + { 3, 64, 128 }, + { 5, 16, 128 }, + { 17, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + }, + { + /* Two RB / SE */ + { + /* One shader engine */ + { 0, 128, 128 }, + { 2, 64, 128 }, + { 3, 32, 128 }, + { 5, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Two shader engines */ + { 0, 128, 128 }, + { 3, 64, 128 }, + { 5, 32, 128 }, + { 9, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Four shader engines */ + { 0, 256, 256 }, + { 2, 128, 256 }, + { 3, 128, 128 }, + { 5, 64, 128 }, + { 9, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + }, + { + /* Four RB / SE */ + { + /* One shader engine */ + { 0, 128, 256 }, + { 2, 128, 128 }, + { 3, 64, 128 }, + { 5, 32, 128 }, + { 9, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Two shader engines */ + { 0, 256, 256 }, + { 2, 128, 256 }, + { 3, 128, 128 }, + { 5, 64, 128 }, + { 9, 32, 128 }, + { 17, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + /* Four shader engines */ + { 0, 256, 512 }, + { 2, 256, 256 }, + { 3, 128, 256 }, + { 5, 128, 128 }, + { 9, 64, 128 }, + { 17, 16, 128 }, + { 33, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + }, + }; + + return si_find_bin_size(sctx->screen, table, sum); +} + +static struct uvec2 si_get_depth_bin_size(struct si_context *sctx) +{ + struct si_state_dsa *dsa = sctx->queued.named.dsa; + + if (!sctx->framebuffer.state.zsbuf || + (!dsa->depth_enabled && !dsa->stencil_enabled)) { + /* Return the max size. */ + struct uvec2 size = {512, 512}; + return size; + } + + struct r600_texture *rtex = + (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture; + unsigned depth_coeff = dsa->depth_enabled ? 5 : 0; + unsigned stencil_coeff = rtex->surface.has_stencil && + dsa->stencil_enabled ? 1 : 0; + unsigned sum = 4 * (depth_coeff + stencil_coeff) * + sctx->framebuffer.nr_samples; + + static const si_bin_size_subtable table[] = { + { + // One RB / SE + { + // One shader engine + { 0, 128, 256 }, + { 2, 128, 128 }, + { 4, 64, 128 }, + { 7, 32, 128 }, + { 13, 16, 128 }, + { 49, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + // Two shader engines + { 0, 256, 256 }, + { 2, 128, 256 }, + { 4, 128, 128 }, + { 7, 64, 128 }, + { 13, 32, 128 }, + { 25, 16, 128 }, + { 49, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + // Four shader engines + { 0, 256, 512 }, + { 2, 256, 256 }, + { 4, 128, 256 }, + { 7, 128, 128 }, + { 13, 64, 128 }, + { 25, 16, 128 }, + { 49, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + }, + { + // Two RB / SE + { + // One shader engine + { 0, 256, 256 }, + { 2, 128, 256 }, + { 4, 128, 128 }, + { 7, 64, 128 }, + { 13, 32, 128 }, + { 25, 16, 128 }, + { 97, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + // Two shader engines + { 0, 256, 512 }, + { 2, 256, 256 }, + { 4, 128, 256 }, + { 7, 128, 128 }, + { 13, 64, 128 }, + { 25, 32, 128 }, + { 49, 16, 128 }, + { 97, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + { + // Four shader engines + { 0, 512, 512 }, + { 2, 256, 512 }, + { 4, 256, 256 }, + { 7, 128, 256 }, + { 13, 128, 128 }, + { 25, 64, 128 }, + { 49, 16, 128 }, + { 97, 0, 0 }, + { UINT_MAX, 0, 0 }, + }, + }, + { + // Four RB / SE + { + // One shader engine + { 0, 256, 512 }, + { 2, 256, 256 }, + { 4, 128, 256 }, + { 7, 128, 128 }, + { 13, 64, 128 }, + { 25, 32, 128 }, + { 49, 16, 128 }, + { UINT_MAX, 0, 0 }, + }, + { + // Two shader engines + { 0, 512, 512 }, + { 2, 256, 512 }, + { 4, 256, 256 }, + { 7, 128, 256 }, + { 13, 128, 128 }, + { 25, 64, 128 }, + { 49, 32, 128 }, + { 97, 16, 128 }, + { UINT_MAX, 0, 0 }, + }, + { + // Four shader engines + { 0, 512, 512 }, + { 4, 256, 512 }, + { 7, 256, 256 }, + { 13, 128, 256 }, + { 25, 128, 128 }, + { 49, 64, 128 }, + { 97, 16, 128 }, + { UINT_MAX, 0, 0 }, + }, + }, + }; + + return si_find_bin_size(sctx->screen, table, sum); +} + +static void si_emit_dpbb_disable(struct si_context *sctx) +{ + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + + radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, + S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | + S_028C44_DISABLE_START_OF_PRIM(1)); + radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, + S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF)); +} + +void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state) +{ + struct si_screen *sscreen = sctx->screen; + struct si_state_blend *blend = sctx->queued.named.blend; + struct si_state_dsa *dsa = sctx->queued.named.dsa; + unsigned db_shader_control = sctx->ps_db_shader_control; + + assert(sctx->b.chip_class >= GFX9); + + if (!sscreen->dpbb_allowed || !blend || !dsa) { + si_emit_dpbb_disable(sctx); + return; + } + + bool ps_can_kill = G_02880C_KILL_ENABLE(db_shader_control) || + G_02880C_MASK_EXPORT_ENABLE(db_shader_control) || + G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) || + blend->alpha_to_coverage; + + /* This is ported from Vulkan, but it doesn't make much sense to me. + * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this. + */ + bool ps_can_reject_z_trivially = + !G_02880C_Z_EXPORT_ENABLE(db_shader_control) || + G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control); + + /* Disable binning if PS can kill trivially with DB writes. + * Ported from Vulkan. (heuristic?) + */ + if (ps_can_kill && + ps_can_reject_z_trivially && + sctx->framebuffer.state.zsbuf && + dsa->db_can_write) { + si_emit_dpbb_disable(sctx); + return; + } + + /* Compute the bin size. */ + /* TODO: We could also look at enabled pixel shader outputs. */ + unsigned cb_target_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit & + blend->cb_target_enabled_4bit; + struct uvec2 color_bin_size = + si_get_color_bin_size(sctx, cb_target_enabled_4bit); + struct uvec2 depth_bin_size = si_get_depth_bin_size(sctx); + + unsigned color_area = color_bin_size.x * color_bin_size.y; + unsigned depth_area = depth_bin_size.x * depth_bin_size.y; + + struct uvec2 bin_size = color_area < depth_area ? color_bin_size + : depth_bin_size; + + if (!bin_size.x || !bin_size.y) { + si_emit_dpbb_disable(sctx); + return; + } + + /* Enable DFSM if it's preferred. */ + unsigned punchout_mode = V_028060_FORCE_OFF; + bool disable_start_of_prim = true; + + if (sscreen->dfsm_allowed && + cb_target_enabled_4bit && + !G_02880C_KILL_ENABLE(db_shader_control) && + /* These two also imply that DFSM is disabled when PS writes to memory. */ + !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) && + !G_02880C_EXEC_ON_NOOP(db_shader_control) && + G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) { + punchout_mode = V_028060_AUTO; + disable_start_of_prim = (cb_target_enabled_4bit & + blend->blend_enable_4bit) != 0; + } + + /* Tunable parameters. Also test with DFSM enabled/disabled. */ + unsigned context_states_per_bin; /* allowed range: [0, 5] */ + unsigned persistent_states_per_bin; /* allowed range: [0, 31] */ + unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */ + + switch (sctx->b.family) { + case CHIP_VEGA10: + case CHIP_RAVEN: + /* Tuned for Raven. Vega might need different values. */ + context_states_per_bin = 5; + persistent_states_per_bin = 31; + fpovs_per_batch = 63; + break; + default: + assert(0); + } + + /* Emit registers. */ + struct uvec2 bin_size_extend = {}; + if (bin_size.x >= 32) + bin_size_extend.x = util_logbase2(bin_size.x) - 5; + if (bin_size.y >= 32) + bin_size_extend.y = util_logbase2(bin_size.y) - 5; + + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, + S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | + S_028C44_BIN_SIZE_X(bin_size.x == 16) | + S_028C44_BIN_SIZE_Y(bin_size.y == 16) | + S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) | + S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) | + S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) | + S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) | + S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) | + S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) | + S_028C44_OPTIMAL_BIN_SELECTION(1)); + radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, + S_028060_PUNCHOUT_MODE(punchout_mode)); +} diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c new file mode 100644 index 000000000..133f1e4aa --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c @@ -0,0 +1,209 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +#include "si_pipe.h" +#include "sid.h" +#include "radeon/r600_cs.h" + +/* For MSAA sample positions. */ +#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ + (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \ + (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \ + (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ + (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) + +/* 2xMSAA + * There are two locations (4, 4), (-4, -4). */ +static const uint32_t sample_locs_2x[4] = { + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), + FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4), +}; +/* 4xMSAA + * There are 4 locations: (-2, -6), (6, -2), (-6, 2), (2, 6). */ +static const uint32_t sample_locs_4x[4] = { + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), + FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6), +}; + +/* Cayman 8xMSAA */ +static const uint32_t sample_locs_8x[] = { + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), + FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), +}; +/* Cayman 16xMSAA */ +static const uint32_t sample_locs_16x[] = { + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), + FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8), +}; + +static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count, + unsigned sample_index, float *out_value) +{ + int offset, index; + struct { + int idx:4; + } val; + + switch (sample_count) { + case 1: + default: + out_value[0] = out_value[1] = 0.5; + break; + case 2: + offset = 4 * (sample_index * 2); + val.idx = (sample_locs_2x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 4: + offset = 4 * (sample_index * 2); + val.idx = (sample_locs_4x[0] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 8: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (sample_locs_8x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + case 16: + offset = 4 * (sample_index % 4 * 2); + index = (sample_index / 4) * 4; + val.idx = (sample_locs_16x[index] >> offset) & 0xf; + out_value[0] = (float)(val.idx + 8) / 16.0f; + val.idx = (sample_locs_16x[index] >> (offset + 4)) & 0xf; + out_value[1] = (float)(val.idx + 8) / 16.0f; + break; + } +} + +void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples) +{ + switch (nr_samples) { + default: + case 1: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); + break; + case 2: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]); + break; + case 4: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]); + break; + case 8: + radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); + radeon_emit(cs, sample_locs_8x[0]); + radeon_emit(cs, sample_locs_8x[4]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, sample_locs_8x[1]); + radeon_emit(cs, sample_locs_8x[5]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, sample_locs_8x[2]); + radeon_emit(cs, sample_locs_8x[6]); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, sample_locs_8x[3]); + radeon_emit(cs, sample_locs_8x[7]); + break; + case 16: + radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16); + radeon_emit(cs, sample_locs_16x[0]); + radeon_emit(cs, sample_locs_16x[4]); + radeon_emit(cs, sample_locs_16x[8]); + radeon_emit(cs, sample_locs_16x[12]); + radeon_emit(cs, sample_locs_16x[1]); + radeon_emit(cs, sample_locs_16x[5]); + radeon_emit(cs, sample_locs_16x[9]); + radeon_emit(cs, sample_locs_16x[13]); + radeon_emit(cs, sample_locs_16x[2]); + radeon_emit(cs, sample_locs_16x[6]); + radeon_emit(cs, sample_locs_16x[10]); + radeon_emit(cs, sample_locs_16x[14]); + radeon_emit(cs, sample_locs_16x[3]); + radeon_emit(cs, sample_locs_16x[7]); + radeon_emit(cs, sample_locs_16x[11]); + radeon_emit(cs, sample_locs_16x[15]); + break; + } +} + +void si_init_msaa_functions(struct si_context *sctx) +{ + int i; + + sctx->b.b.get_sample_position = si_get_sample_position; + + si_get_sample_position(&sctx->b.b, 1, 0, sctx->sample_locations_1x[0]); + + for (i = 0; i < 2; i++) + si_get_sample_position(&sctx->b.b, 2, i, sctx->sample_locations_2x[i]); + for (i = 0; i < 4; i++) + si_get_sample_position(&sctx->b.b, 4, i, sctx->sample_locations_4x[i]); + for (i = 0; i < 8; i++) + si_get_sample_position(&sctx->b.b, 8, i, sctx->sample_locations_8x[i]); + for (i = 0; i < 16; i++) + si_get_sample_position(&sctx->b.b, 16, i, sctx->sample_locations_16x[i]); +} diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c new file mode 100644 index 000000000..9971bc815 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -0,0 +1,423 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Olšák <maraeo@gmail.com> + * + */ + +#include "si_pipe.h" +#include "si_state.h" +#include "sid.h" +#include "radeon/r600_cs.h" + +#include "util/u_memory.h" + +static void si_set_streamout_enable(struct si_context *sctx, bool enable); + +static inline void si_so_target_reference(struct si_streamout_target **dst, + struct pipe_stream_output_target *src) +{ + pipe_so_target_reference((struct pipe_stream_output_target**)dst, src); +} + +static struct pipe_stream_output_target * +si_create_so_target(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_streamout_target *t; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; + + t = CALLOC_STRUCT(si_streamout_target); + if (!t) { + return NULL; + } + + u_suballocator_alloc(sctx->b.allocator_zeroed_memory, 4, 4, + &t->buf_filled_size_offset, + (struct pipe_resource**)&t->buf_filled_size); + if (!t->buf_filled_size) { + FREE(t); + return NULL; + } + + t->b.reference.count = 1; + t->b.context = ctx; + pipe_resource_reference(&t->b.buffer, buffer); + t->b.buffer_offset = buffer_offset; + t->b.buffer_size = buffer_size; + + util_range_add(&rbuffer->valid_buffer_range, buffer_offset, + buffer_offset + buffer_size); + return &t->b; +} + +static void si_so_target_destroy(struct pipe_context *ctx, + struct pipe_stream_output_target *target) +{ + struct si_streamout_target *t = (struct si_streamout_target*)target; + pipe_resource_reference(&t->b.buffer, NULL); + r600_resource_reference(&t->buf_filled_size, NULL); + FREE(t); +} + +void si_streamout_buffers_dirty(struct si_context *sctx) +{ + if (!sctx->streamout.enabled_mask) + return; + + si_mark_atom_dirty(sctx, &sctx->streamout.begin_atom); + si_set_streamout_enable(sctx, true); +} + +static void si_set_streamout_targets(struct pipe_context *ctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_buffer_resources *buffers = &sctx->rw_buffers; + struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; + unsigned old_num_targets = sctx->streamout.num_targets; + unsigned i, bufidx; + + /* We are going to unbind the buffers. Mark which caches need to be flushed. */ + if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) { + /* Since streamout uses vector writes which go through TC L2 + * and most other clients can use TC L2 as well, we don't need + * to flush it. + * + * The only cases which requires flushing it is VGT DMA index + * fetching (on <= CIK) and indirect draw data, which are rare + * cases. Thus, flag the TC L2 dirtiness in the resource and + * handle it at draw call time. + */ + for (i = 0; i < sctx->streamout.num_targets; i++) + if (sctx->streamout.targets[i]) + r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true; + + /* Invalidate the scalar cache in case a streamout buffer is + * going to be used as a constant buffer. + * + * Invalidate TC L1, because streamout bypasses it (done by + * setting GLC=1 in the store instruction), but it can contain + * outdated data of streamout buffers. + * + * VS_PARTIAL_FLUSH is required if the buffers are going to be + * used as an input immediately. + */ + sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_VS_PARTIAL_FLUSH; + } + + /* All readers of the streamout targets need to be finished before we can + * start writing to the targets. + */ + if (num_targets) + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + SI_CONTEXT_CS_PARTIAL_FLUSH; + + /* Streamout buffers must be bound in 2 places: + * 1) in VGT by setting the VGT_STRMOUT registers + * 2) as shader resources + */ + + /* Stop streamout. */ + if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) + si_emit_streamout_end(sctx); + + /* Set the new targets. */ + unsigned enabled_mask = 0, append_bitmask = 0; + for (i = 0; i < num_targets; i++) { + si_so_target_reference(&sctx->streamout.targets[i], targets[i]); + if (!targets[i]) + continue; + + r600_context_add_resource_size(ctx, targets[i]->buffer); + enabled_mask |= 1 << i; + + if (offsets[i] == ((unsigned)-1)) + append_bitmask |= 1 << i; + } + + for (; i < sctx->streamout.num_targets; i++) + si_so_target_reference(&sctx->streamout.targets[i], NULL); + + sctx->streamout.enabled_mask = enabled_mask; + sctx->streamout.num_targets = num_targets; + sctx->streamout.append_bitmask = append_bitmask; + + /* Update dirty state bits. */ + if (num_targets) { + si_streamout_buffers_dirty(sctx); + } else { + si_set_atom_dirty(sctx, &sctx->streamout.begin_atom, false); + si_set_streamout_enable(sctx, false); + } + + /* Set the shader resources.*/ + for (i = 0; i < num_targets; i++) { + bufidx = SI_VS_STREAMOUT_BUF0 + i; + + if (targets[i]) { + struct pipe_resource *buffer = targets[i]->buffer; + uint64_t va = r600_resource(buffer)->gpu_address; + + /* Set the descriptor. + * + * On VI, the format must be non-INVALID, otherwise + * the buffer will be considered not bound and store + * instructions will be no-ops. + */ + uint32_t *desc = descs->list + bufidx*4; + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); + desc[2] = 0xffffffff; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + /* Set the resource. */ + pipe_resource_reference(&buffers->buffers[bufidx], + buffer); + radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, + (struct r600_resource*)buffer, + buffers->shader_usage, + RADEON_PRIO_SHADER_RW_BUFFER, + true); + r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT; + + buffers->enabled_mask |= 1u << bufidx; + } else { + /* Clear the descriptor and unset the resource. */ + memset(descs->list + bufidx*4, 0, + sizeof(uint32_t) * 4); + pipe_resource_reference(&buffers->buffers[bufidx], + NULL); + buffers->enabled_mask &= ~(1u << bufidx); + } + } + for (; i < old_num_targets; i++) { + bufidx = SI_VS_STREAMOUT_BUF0 + i; + /* Clear the descriptor and unset the resource. */ + memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4); + pipe_resource_reference(&buffers->buffers[bufidx], NULL); + buffers->enabled_mask &= ~(1u << bufidx); + } + + sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; +} + +static void si_flush_vgt_streamout(struct si_context *sctx) +{ + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + unsigned reg_strmout_cntl; + + /* The register is at different places on different ASICs. */ + if (sctx->b.chip_class >= CIK) { + reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; + radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); + } else { + reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; + radeon_set_config_reg(cs, reg_strmout_cntl, 0); + } + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); + + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ + radeon_emit(cs, 0); + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ + radeon_emit(cs, 4); /* poll interval */ +} + +static void si_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom) +{ + struct si_context *sctx = (struct si_context*)rctx; + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + struct si_streamout_target **t = sctx->streamout.targets; + uint16_t *stride_in_dw = sctx->streamout.stride_in_dw; + unsigned i; + + si_flush_vgt_streamout(sctx); + + for (i = 0; i < sctx->streamout.num_targets; i++) { + if (!t[i]) + continue; + + t[i]->stride_in_dw = stride_in_dw[i]; + + /* SI binds streamout buffers as shader resources. + * VGT only counts primitives and tells the shader + * through SGPRs what to do. */ + radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); + radeon_emit(cs, (t[i]->b.buffer_offset + + t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ + + if (sctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) { + uint64_t va = t[i]->buf_filled_size->gpu_address + + t[i]->buf_filled_size_offset; + + /* Append. */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); /* src address lo */ + radeon_emit(cs, va >> 32); /* src address hi */ + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + t[i]->buf_filled_size, + RADEON_USAGE_READ, + RADEON_PRIO_SO_FILLED_SIZE); + } else { + /* Start from the beginning. */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */ + radeon_emit(cs, 0); /* unused */ + } + } + + sctx->streamout.begin_emitted = true; +} + +void si_emit_streamout_end(struct si_context *sctx) +{ + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + struct si_streamout_target **t = sctx->streamout.targets; + unsigned i; + uint64_t va; + + si_flush_vgt_streamout(sctx); + + for (i = 0; i < sctx->streamout.num_targets; i++) { + if (!t[i]) + continue; + + va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset; + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | + STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ + radeon_emit(cs, va); /* dst address lo */ + radeon_emit(cs, va >> 32); /* dst address hi */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + t[i]->buf_filled_size, + RADEON_USAGE_WRITE, + RADEON_PRIO_SO_FILLED_SIZE); + + /* Zero the buffer size. The counters (primitives generated, + * primitives emitted) may be enabled even if there is not + * buffer bound. This ensures that the primitives-emitted query + * won't increment. */ + radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); + + t[i]->buf_filled_size_valid = true; + } + + sctx->streamout.begin_emitted = false; + sctx->b.flags |= R600_CONTEXT_STREAMOUT_FLUSH; +} + +/* STREAMOUT CONFIG DERIVED STATE + * + * Streamout must be enabled for the PRIMITIVES_GENERATED query to work. + * The buffer mask is an independent state, so no writes occur if there + * are no buffers bound. + */ + +static void si_emit_streamout_enable(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + struct si_context *sctx = (struct si_context*)rctx; + + radeon_set_context_reg_seq(sctx->b.gfx.cs, R_028B94_VGT_STRMOUT_CONFIG, 2); + radeon_emit(sctx->b.gfx.cs, + S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) | + S_028B94_RAST_STREAM(0) | + S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx)) | + S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx)) | + S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx))); + radeon_emit(sctx->b.gfx.cs, + sctx->streamout.hw_enabled_mask & + sctx->streamout.enabled_stream_buffers_mask); +} + +static void si_set_streamout_enable(struct si_context *sctx, bool enable) +{ + bool old_strmout_en = si_get_strmout_en(sctx); + unsigned old_hw_enabled_mask = sctx->streamout.hw_enabled_mask; + + sctx->streamout.streamout_enabled = enable; + + sctx->streamout.hw_enabled_mask = sctx->streamout.enabled_mask | + (sctx->streamout.enabled_mask << 4) | + (sctx->streamout.enabled_mask << 8) | + (sctx->streamout.enabled_mask << 12); + + if ((old_strmout_en != si_get_strmout_en(sctx)) || + (old_hw_enabled_mask != sctx->streamout.hw_enabled_mask)) + si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom); +} + +void si_update_prims_generated_query_state(struct si_context *sctx, + unsigned type, int diff) +{ + if (type == PIPE_QUERY_PRIMITIVES_GENERATED) { + bool old_strmout_en = si_get_strmout_en(sctx); + + sctx->streamout.num_prims_gen_queries += diff; + assert(sctx->streamout.num_prims_gen_queries >= 0); + + sctx->streamout.prims_gen_query_enabled = + sctx->streamout.num_prims_gen_queries != 0; + + if (old_strmout_en != si_get_strmout_en(sctx)) + si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom); + } +} + +void si_init_streamout_functions(struct si_context *sctx) +{ + sctx->b.b.create_stream_output_target = si_create_so_target; + sctx->b.b.stream_output_target_destroy = si_so_target_destroy; + sctx->b.b.set_stream_output_targets = si_set_streamout_targets; + sctx->streamout.begin_atom.emit = si_emit_streamout_begin; + sctx->streamout.enable_atom.emit = si_emit_streamout_enable; +} diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c new file mode 100644 index 000000000..f41655847 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -0,0 +1,445 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_pipe.h" +#include "sid.h" +#include "radeon/r600_cs.h" +#include "util/u_viewport.h" +#include "tgsi/tgsi_scan.h" + +#define SI_MAX_SCISSOR 16384 + +static void si_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *state) +{ + struct si_context *ctx = (struct si_context *)pctx; + int i; + + for (i = 0; i < num_scissors; i++) + ctx->scissors.states[start_slot + i] = state[i]; + + if (!ctx->queued.named.rasterizer || + !ctx->queued.named.rasterizer->scissor_enable) + return; + + ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; + si_mark_atom_dirty(ctx, &ctx->scissors.atom); +} + +/* Since the guard band disables clipping, we have to clip per-pixel + * using a scissor. + */ +static void si_get_scissor_from_viewport(struct si_context *ctx, + const struct pipe_viewport_state *vp, + struct si_signed_scissor *scissor) +{ + float tmp, minx, miny, maxx, maxy; + + /* Convert (-1, -1) and (1, 1) from clip space into window space. */ + minx = -vp->scale[0] + vp->translate[0]; + miny = -vp->scale[1] + vp->translate[1]; + maxx = vp->scale[0] + vp->translate[0]; + maxy = vp->scale[1] + vp->translate[1]; + + /* Handle inverted viewports. */ + if (minx > maxx) { + tmp = minx; + minx = maxx; + maxx = tmp; + } + if (miny > maxy) { + tmp = miny; + miny = maxy; + maxy = tmp; + } + + /* Convert to integer and round up the max bounds. */ + scissor->minx = minx; + scissor->miny = miny; + scissor->maxx = ceilf(maxx); + scissor->maxy = ceilf(maxy); +} + +static void si_clamp_scissor(struct si_context *ctx, + struct pipe_scissor_state *out, + struct si_signed_scissor *scissor) +{ + out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR); + out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR); + out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR); + out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR); +} + +static void si_clip_scissor(struct pipe_scissor_state *out, + struct pipe_scissor_state *clip) +{ + out->minx = MAX2(out->minx, clip->minx); + out->miny = MAX2(out->miny, clip->miny); + out->maxx = MIN2(out->maxx, clip->maxx); + out->maxy = MIN2(out->maxy, clip->maxy); +} + +static void si_scissor_make_union(struct si_signed_scissor *out, + struct si_signed_scissor *in) +{ + out->minx = MIN2(out->minx, in->minx); + out->miny = MIN2(out->miny, in->miny); + out->maxx = MAX2(out->maxx, in->maxx); + out->maxy = MAX2(out->maxy, in->maxy); +} + +static void si_emit_one_scissor(struct si_context *ctx, + struct radeon_winsys_cs *cs, + struct si_signed_scissor *vp_scissor, + struct pipe_scissor_state *scissor) +{ + struct pipe_scissor_state final; + + if (ctx->vs_disables_clipping_viewport) { + final.minx = final.miny = 0; + final.maxx = final.maxy = SI_MAX_SCISSOR; + } else { + si_clamp_scissor(ctx, &final, vp_scissor); + } + + if (scissor) + si_clip_scissor(&final, scissor); + + radeon_emit(cs, S_028250_TL_X(final.minx) | + S_028250_TL_Y(final.miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(final.maxx) | + S_028254_BR_Y(final.maxy)); +} + +/* the range is [-MAX, MAX] */ +#define GET_MAX_VIEWPORT_RANGE(rctx) (32768) + +static void si_emit_guardband(struct si_context *ctx, + struct si_signed_scissor *vp_as_scissor) +{ + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct pipe_viewport_state vp; + float left, top, right, bottom, max_range, guardband_x, guardband_y; + float discard_x, discard_y; + + /* Reconstruct the viewport transformation from the scissor. */ + vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0; + vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0; + vp.scale[0] = vp_as_scissor->maxx - vp.translate[0]; + vp.scale[1] = vp_as_scissor->maxy - vp.translate[1]; + + /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ + if (vp_as_scissor->minx == vp_as_scissor->maxx) + vp.scale[0] = 0.5; + if (vp_as_scissor->miny == vp_as_scissor->maxy) + vp.scale[1] = 0.5; + + /* Find the biggest guard band that is inside the supported viewport + * range. The guard band is specified as a horizontal and vertical + * distance from (0,0) in clip space. + * + * This is done by applying the inverse viewport transformation + * on the viewport limits to get those limits in clip space. + * + * Use a limit one pixel smaller to allow for some precision error. + */ + max_range = GET_MAX_VIEWPORT_RANGE(ctx) - 1; + left = (-max_range - vp.translate[0]) / vp.scale[0]; + right = ( max_range - vp.translate[0]) / vp.scale[0]; + top = (-max_range - vp.translate[1]) / vp.scale[1]; + bottom = ( max_range - vp.translate[1]) / vp.scale[1]; + + assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); + + guardband_x = MIN2(-left, right); + guardband_y = MIN2(-top, bottom); + + discard_x = 1.0; + discard_y = 1.0; + + if (unlikely(ctx->current_rast_prim < PIPE_PRIM_TRIANGLES) && + ctx->queued.named.rasterizer) { + /* When rendering wide points or lines, we need to be more + * conservative about when to discard them entirely. */ + const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer; + float pixels; + + if (ctx->current_rast_prim == PIPE_PRIM_POINTS) + pixels = rs->max_point_size; + else + pixels = rs->line_width; + + /* Add half the point size / line width */ + discard_x += pixels / (2.0 * vp.scale[0]); + discard_y += pixels / (2.0 * vp.scale[1]); + + /* Discard primitives that would lie entirely outside the clip + * region. */ + discard_x = MIN2(discard_x, guardband_x); + discard_y = MIN2(discard_y, guardband_y); + } + + /* If any of the GB registers is updated, all of them must be updated. */ + radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4); + + radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */ + radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */ + radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */ + radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */ +} + +static void si_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom) +{ + struct si_context *ctx = (struct si_context *)rctx; + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct pipe_scissor_state *states = ctx->scissors.states; + unsigned mask = ctx->scissors.dirty_mask; + bool scissor_enabled = false; + struct si_signed_scissor max_vp_scissor; + int i; + + if (ctx->queued.named.rasterizer) + scissor_enabled = ctx->queued.named.rasterizer->scissor_enable; + + /* The simple case: Only 1 viewport is active. */ + if (!ctx->vs_writes_viewport_index) { + struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0]; + + if (!(mask & 1)) + return; + + radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); + si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL); + si_emit_guardband(ctx, vp); + ctx->scissors.dirty_mask &= ~1; /* clear one bit */ + return; + } + + /* Shaders can draw to any viewport. Make a union of all viewports. */ + max_vp_scissor = ctx->viewports.as_scissor[0]; + for (i = 1; i < SI_MAX_VIEWPORTS; i++) + si_scissor_make_union(&max_vp_scissor, + &ctx->viewports.as_scissor[i]); + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + + start * 4 * 2, count * 2); + for (i = start; i < start+count; i++) { + si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i], + scissor_enabled ? &states[i] : NULL); + } + } + si_emit_guardband(ctx, &max_vp_scissor); + ctx->scissors.dirty_mask = 0; +} + +static void si_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *state) +{ + struct si_context *ctx = (struct si_context *)pctx; + unsigned mask; + int i; + + for (i = 0; i < num_viewports; i++) { + unsigned index = start_slot + i; + + ctx->viewports.states[index] = state[i]; + si_get_scissor_from_viewport(ctx, &state[i], + &ctx->viewports.as_scissor[index]); + } + + mask = ((1 << num_viewports) - 1) << start_slot; + ctx->viewports.dirty_mask |= mask; + ctx->viewports.depth_range_dirty_mask |= mask; + ctx->scissors.dirty_mask |= mask; + si_mark_atom_dirty(ctx, &ctx->viewports.atom); + si_mark_atom_dirty(ctx, &ctx->scissors.atom); +} + +static void si_emit_one_viewport(struct si_context *ctx, + struct pipe_viewport_state *state) +{ + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + + radeon_emit(cs, fui(state->scale[0])); + radeon_emit(cs, fui(state->translate[0])); + radeon_emit(cs, fui(state->scale[1])); + radeon_emit(cs, fui(state->translate[1])); + radeon_emit(cs, fui(state->scale[2])); + radeon_emit(cs, fui(state->translate[2])); +} + +static void si_emit_viewports(struct si_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct pipe_viewport_state *states = ctx->viewports.states; + unsigned mask = ctx->viewports.dirty_mask; + + /* The simple case: Only 1 viewport is active. */ + if (!ctx->vs_writes_viewport_index) { + if (!(mask & 1)) + return; + + radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); + si_emit_one_viewport(ctx, &states[0]); + ctx->viewports.dirty_mask &= ~1; /* clear one bit */ + return; + } + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + + start * 4 * 6, count * 6); + for (i = start; i < start+count; i++) + si_emit_one_viewport(ctx, &states[i]); + } + ctx->viewports.dirty_mask = 0; +} + +static inline void +si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz, + bool window_space_position, float *zmin, float *zmax) +{ + if (window_space_position) { + *zmin = 0; + *zmax = 1; + return; + } + util_viewport_zmin_zmax(vp, halfz, zmin, zmax); +} + +static void si_emit_depth_ranges(struct si_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct pipe_viewport_state *states = ctx->viewports.states; + unsigned mask = ctx->viewports.depth_range_dirty_mask; + bool clip_halfz = false; + bool window_space = ctx->vs_disables_clipping_viewport; + float zmin, zmax; + + if (ctx->queued.named.rasterizer) + clip_halfz = ctx->queued.named.rasterizer->clip_halfz; + + /* The simple case: Only 1 viewport is active. */ + if (!ctx->vs_writes_viewport_index) { + if (!(mask & 1)) + return; + + si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, + &zmin, &zmax); + + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); + radeon_emit(cs, fui(zmin)); + radeon_emit(cs, fui(zmax)); + ctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */ + return; + } + + while (mask) { + int start, count, i; + + u_bit_scan_consecutive_range(&mask, &start, &count); + + radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + + start * 4 * 2, count * 2); + for (i = start; i < start+count; i++) { + si_viewport_zmin_zmax(&states[i], clip_halfz, window_space, + &zmin, &zmax); + radeon_emit(cs, fui(zmin)); + radeon_emit(cs, fui(zmax)); + } + } + ctx->viewports.depth_range_dirty_mask = 0; +} + +static void si_emit_viewport_states(struct r600_common_context *rctx, + struct r600_atom *atom) +{ + struct si_context *ctx = (struct si_context *)rctx; + si_emit_viewports(ctx); + si_emit_depth_ranges(ctx); +} + +/** + * This reacts to 2 state changes: + * - VS.writes_viewport_index + * - VS output position in window space (enable/disable) + * + * Normally, we only emit 1 viewport and 1 scissor if no shader is using + * the VIEWPORT_INDEX output, and emitting the other viewports and scissors + * is delayed. When a shader with VIEWPORT_INDEX appears, this should be + * called to emit the rest. + */ +void si_update_vs_viewport_state(struct si_context *ctx) +{ + struct tgsi_shader_info *info = si_get_vs_info(ctx); + bool vs_window_space; + + if (!info) + return; + + /* When the VS disables clipping and viewport transformation. */ + vs_window_space = + info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; + + if (ctx->vs_disables_clipping_viewport != vs_window_space) { + ctx->vs_disables_clipping_viewport = vs_window_space; + ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(ctx, &ctx->scissors.atom); + si_mark_atom_dirty(ctx, &ctx->viewports.atom); + } + + /* Viewport index handling. */ + ctx->vs_writes_viewport_index = info->writes_viewport_index; + if (!ctx->vs_writes_viewport_index) + return; + + if (ctx->scissors.dirty_mask) + si_mark_atom_dirty(ctx, &ctx->scissors.atom); + + if (ctx->viewports.dirty_mask || + ctx->viewports.depth_range_dirty_mask) + si_mark_atom_dirty(ctx, &ctx->viewports.atom); +} + +void si_init_viewport_functions(struct si_context *ctx) +{ + ctx->scissors.atom.emit = si_emit_scissors; + ctx->viewports.atom.emit = si_emit_viewport_states; + + ctx->b.b.set_scissor_states = si_set_scissor_states; + ctx->b.b.set_viewport_states = si_set_viewport_states; +} diff --git a/lib/mesa/src/gallium/drivers/softpipe/sp_query.c b/lib/mesa/src/gallium/drivers/softpipe/sp_query.c index bec0116a5..267c99977 100644 --- a/lib/mesa/src/gallium/drivers/softpipe/sp_query.c +++ b/lib/mesa/src/gallium/drivers/softpipe/sp_query.c @@ -60,9 +60,11 @@ softpipe_create_query(struct pipe_context *pipe, assert(type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE || + type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE || type == PIPE_QUERY_TIME_ELAPSED || type == PIPE_QUERY_SO_STATISTICS || type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || + type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || type == PIPE_QUERY_PRIMITIVES_EMITTED || type == PIPE_QUERY_PRIMITIVES_GENERATED || type == PIPE_QUERY_PIPELINE_STATISTICS || @@ -92,6 +94,7 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: sq->start = softpipe->occlusion_count; break; case PIPE_QUERY_TIME_ELAPSED: @@ -102,7 +105,9 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - sq->end = FALSE; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written; + sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_PRIMITIVES_EMITTED: sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written; @@ -144,6 +149,7 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: sq->end = softpipe->occlusion_count; break; case PIPE_QUERY_TIMESTAMP: @@ -153,6 +159,7 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) sq->end = os_time_get_nano(); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written - sq->so.num_primitives_written; sq->so.primitives_storage_needed = @@ -230,6 +237,7 @@ softpipe_get_query_result(struct pipe_context *pipe, vresult->b = TRUE; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: vresult->b = sq->end != 0; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: { @@ -247,6 +255,7 @@ softpipe_get_query_result(struct pipe_context *pipe, *result = sq->so.primitives_storage_needed; break; case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: vresult->b = sq->end - sq->start != 0; break; default: diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c index b968fb030..19d5e5031 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c @@ -286,25 +286,25 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, gen_type, gen_nr, gen_size, gen_func, &gen_buf); - if (ret != PIPE_OK) - goto done; - - pipe_debug_message(&svga->debug.callback, PERF_INFO, - "generating temporary index buffer for drawing %s", - u_prim_name(prim)); - - ret = svga_hwtnl_simple_draw_range_elements(hwtnl, - gen_buf, - gen_size, - start, - 0, - count - 1, - gen_prim, 0, gen_nr, - start_instance, - instance_count); -done: - if (gen_buf) + if (ret == PIPE_OK) { + pipe_debug_message(&svga->debug.callback, PERF_INFO, + "generating temporary index buffer for drawing %s", + u_prim_name(prim)); + + ret = svga_hwtnl_simple_draw_range_elements(hwtnl, + gen_buf, + gen_size, + start, + 0, + count - 1, + gen_prim, 0, gen_nr, + start_instance, + instance_count); + } + + if (gen_buf) { pipe_resource_reference(&gen_buf, NULL); + } } SVGA_STATS_TIME_POP(svga_sws(svga)); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c index f9bb13664..b1db87107 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c @@ -242,21 +242,21 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, prim, gen_prim, count, gen_nr, gen_size, gen_func, &gen_buf); - if (ret != PIPE_OK) - goto done; - - ret = svga_hwtnl_simple_draw_range_elements(hwtnl, - gen_buf, - gen_size, - index_bias, - min_index, - max_index, - gen_prim, 0, gen_nr, - start_instance, - instance_count); -done: - if (gen_buf) + if (ret == PIPE_OK) { + ret = svga_hwtnl_simple_draw_range_elements(hwtnl, + gen_buf, + gen_size, + index_bias, + min_index, + max_index, + gen_prim, 0, gen_nr, + start_instance, + instance_count); + } + + if (gen_buf) { pipe_resource_reference(&gen_buf, NULL); + } } SVGA_STATS_TIME_POP(svga_sws(hwtnl->svga)); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h index 38e5e66fd..2a60038e9 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h +++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h @@ -42,7 +42,7 @@ struct u_upload_mgr; * handled by the svga device. Other types will be converted to * these types by the index/translation code. */ -static const unsigned svga_hw_prims = +static const unsigned svga_hw_prims = ((1 << PIPE_PRIM_POINTS) | (1 << PIPE_PRIM_LINES) | (1 << PIPE_PRIM_LINE_STRIP) | @@ -73,23 +73,23 @@ svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count) case PIPE_PRIM_LINES: *prim_count = vcount / 2; - return SVGA3D_PRIMITIVE_LINELIST; + return SVGA3D_PRIMITIVE_LINELIST; case PIPE_PRIM_LINE_STRIP: *prim_count = vcount - 1; - return SVGA3D_PRIMITIVE_LINESTRIP; + return SVGA3D_PRIMITIVE_LINESTRIP; case PIPE_PRIM_TRIANGLES: *prim_count = vcount / 3; - return SVGA3D_PRIMITIVE_TRIANGLELIST; + return SVGA3D_PRIMITIVE_TRIANGLELIST; case PIPE_PRIM_TRIANGLE_STRIP: *prim_count = vcount - 2; - return SVGA3D_PRIMITIVE_TRIANGLESTRIP; + return SVGA3D_PRIMITIVE_TRIANGLESTRIP; case PIPE_PRIM_TRIANGLE_FAN: *prim_count = vcount - 2; - return SVGA3D_PRIMITIVE_TRIANGLEFAN; + return SVGA3D_PRIMITIVE_TRIANGLEFAN; case PIPE_PRIM_LINES_ADJACENCY: *prim_count = vcount / 4; @@ -119,8 +119,7 @@ struct index_cache { u_generate_func generate; unsigned gen_nr; - /* If non-null, this buffer is filled by calling - * generate(nr, map(buffer)) + /* If non-null, this buffer is filled by calling generate(nr, map(buffer)) */ struct pipe_resource *buffer; }; @@ -160,7 +159,7 @@ struct svga_hwtnl { * vertex buffers. */ int index_bias; - + /* Provoking vertex information (for flat shading). */ unsigned api_pv; /**< app-requested PV mode (PV_FIRST or PV_LAST) */ unsigned hw_pv; /**< device-supported PV mode (PV_FIRST or PV_LAST) */ @@ -220,27 +219,26 @@ svga_need_unfilled_fallback(const struct svga_hwtnl *hwtnl, } -enum pipe_error -svga_hwtnl_prim( struct svga_hwtnl *hwtnl, - const SVGA3dPrimitiveRange *range, - unsigned vcount, - unsigned min_index, - unsigned max_index, - struct pipe_resource *ib, - unsigned start_instance, unsigned instance_count); - enum pipe_error -svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, - struct pipe_resource *indexBuffer, - unsigned index_size, - int index_bias, - unsigned min_index, - unsigned max_index, - enum pipe_prim_type prim, - unsigned start, - unsigned count, - unsigned start_instance, - unsigned instance_count); +svga_hwtnl_prim(struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned vcount, + unsigned min_index, + unsigned max_index, + struct pipe_resource *ib, + unsigned start_instance, unsigned instance_count); +enum pipe_error +svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl, + struct pipe_resource *indexBuffer, + unsigned index_size, + int index_bias, + unsigned min_index, + unsigned max_index, + enum pipe_prim_type prim, + unsigned start, + unsigned count, + unsigned start_instance, + unsigned instance_count); #endif diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c index 408e175fe..a29fbd3ac 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c @@ -331,6 +331,7 @@ svga_create_blend_state(struct pipe_context *pipe, blend->independent_blend_enable = templ->independent_blend_enable; blend->alpha_to_coverage = templ->alpha_to_coverage; + blend->alpha_to_one = templ->alpha_to_one; if (svga_have_vgpu10(svga)) { define_blend_state_object(svga, blend); diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c index 7cbd51669..0b2d8af64 100644 --- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c +++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c @@ -209,6 +209,12 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga, goto fail; } + if (emit.info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + debug_printf( + "svga: indirect indexing of temporary registers is not supported.\n"); + goto fail; + } + emit.in_main_func = TRUE; if (!svga_shader_emit_header(&emit)) { diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h new file mode 100644 index 000000000..d81f7d019 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h @@ -0,0 +1,152 @@ +/****************************************************************************** +* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +* +* @file ${filename}.h +* +* @brief Dynamic Knobs for Core. +* +* ======================= AUTO GENERATED: DO NOT EDIT !!! ==================== +* +* Generation Command Line: +* ${'\n* '.join(cmdline)} +* +******************************************************************************/ +<% calc_max_knob_len(knobs) %> +#pragma once +#include <string> + +struct KnobBase +{ +private: + // Update the input string. + static void autoExpandEnvironmentVariables(std::string &text); + +protected: + // Leave input alone and return new string. + static std::string expandEnvironmentVariables(std::string const &input) + { + std::string text = input; + autoExpandEnvironmentVariables(text); + return text; + } + + template <typename T> + static T expandEnvironmentVariables(T const &input) + { + return input; + } +}; + +template <typename T> +struct Knob : KnobBase +{ +public: + const T& Value() const { return m_Value; } + const T& Value(T const &newValue) + { + m_Value = expandEnvironmentVariables(newValue); + return Value(); + } + +private: + T m_Value; +}; + +#define DEFINE_KNOB(_name, _type, _default) \\ + + struct Knob_##_name : Knob<_type> \\ + + { \\ + + static const char* Name() { return "KNOB_" #_name; } \\ + + static _type DefaultValue() { return (_default); } \\ + + } _name; + +#define GET_KNOB(_name) g_GlobalKnobs._name.Value() +#define SET_KNOB(_name, _newValue) g_GlobalKnobs._name.Value(_newValue) + +struct GlobalKnobs +{ + % for knob in knobs: + //----------------------------------------------------------- + // KNOB_${knob[0]} + // + % for line in knob[1]['desc']: + // ${line} + % endfor + % if knob[1].get('choices'): + <% + choices = knob[1].get('choices') + _max_len = calc_max_name_len(choices) %>// + % for i in range(len(choices)): + // ${choices[i]['name']}${space_name(choices[i]['name'], _max_len)} = ${format(choices[i]['value'], '#010x')} + % endfor + % endif + // + % if knob[1]['type'] == 'std::string': + DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, "${repr(knob[1]['default'])[1:-1]}"); + % else: + DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, ${knob[1]['default']}); + % endif + + % endfor + + std::string ToString(const char* optPerLinePrefix=""); + GlobalKnobs(); +}; +extern GlobalKnobs g_GlobalKnobs; + +#undef DEFINE_KNOB + +% for knob in knobs: +#define KNOB_${knob[0]}${space_knob(knob[0])} GET_KNOB(${knob[0]}) +% endfor + +<%! + # Globally available python + max_len = 0 + def calc_max_knob_len(knobs): + global max_len + max_len = 0 + for knob in knobs: + if len(knob[0]) > max_len: max_len = len(knob[0]) + max_len += len('KNOB_ ') + if max_len % 4: max_len += 4 - (max_len % 4) + + def space_knob(knob): + knob_len = len('KNOB_' + knob) + return ' '*(max_len - knob_len) + + def calc_max_name_len(choices_array): + _max_len = 0 + for choice in choices_array: + if len(choice['name']) > _max_len: _max_len = len(choice['name']) + + if _max_len % 4: _max_len += 4 - (_max_len % 4) + return _max_len + + def space_name(name, max_len): + name_len = len(name) + return ' '*(max_len - name_len) +%> diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl new file mode 100644 index 000000000..a4ecd09f1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl @@ -0,0 +1,193 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD128 AVX (512) implementation +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + +#define SIMD_WRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_1I_(op, intrin, mask) \ + template<int ImmT> \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_2I(op) \ + template<int ImmT>\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\ + } + +#define SIMD_WRAPPER_3_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\ + } +#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf)) + +#define SIMD_DWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ + template<int ImmT> \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_2I(op) \ + template<int ImmT>\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\ + } + +#define SIMD_IWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_1I_(op, intrin, mask) \ + template<int ImmT> \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_2I(op) \ + template<int ImmT>\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\ + } + +SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) +SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) +SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 +SIMD_IWRAPPER_2_16(unpackhi_epi16); +SIMD_IWRAPPER_2_64(unpackhi_epi64); +SIMD_IWRAPPER_2_8(unpackhi_epi8); +SIMD_IWRAPPER_2_16(unpacklo_epi16); +SIMD_IWRAPPER_2_64(unpacklo_epi64); +SIMD_IWRAPPER_2_8(unpacklo_epi8); + +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +{ + __mmask64 m = 0xffffull; + return static_cast<uint32_t>( + _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +} + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_1I_ +#undef SIMD_WRAPPER_1I +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_3 +#undef SIMD_DWRAPPER_1_ +#undef SIMD_DWRAPPER_1 +#undef SIMD_DWRAPPER_1I_ +#undef SIMD_DWRAPPER_1I +#undef SIMD_DWRAPPER_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_IWRAPPER_1_ +#undef SIMD_IWRAPPER_1_8 +#undef SIMD_IWRAPPER_1_16 +#undef SIMD_IWRAPPER_1_32 +#undef SIMD_IWRAPPER_1_64 +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_1I_8 +#undef SIMD_IWRAPPER_1I_16 +#undef SIMD_IWRAPPER_1I_32 +#undef SIMD_IWRAPPER_1I_64 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2_8 +#undef SIMD_IWRAPPER_2_16 +#undef SIMD_IWRAPPER_2_32 +#undef SIMD_IWRAPPER_2_64 +#undef SIMD_IWRAPPER_2I +//#undef SIMD_IWRAPPER_2I_8 +//#undef SIMD_IWRAPPER_2I_16 +//#undef SIMD_IWRAPPER_2I_32 +//#undef SIMD_IWRAPPER_2I_64 diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl new file mode 100644 index 000000000..b0cae5034 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl @@ -0,0 +1,35 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD128 AVX (512) implementation for Knights Family +// +// Since this implementation inherits from the AVX512Base implementation, +// the only operations below ones that replace AVX512F / AVX512CD operations +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl new file mode 100644 index 000000000..6ffe7c2a0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl @@ -0,0 +1,127 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD256 AVX (512) implementation for Core processors +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + +#define SIMD_DWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf)) + +#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ + template<int ImmT> \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf)) + +#define SIMD_DWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_1I_(op, intrin, mask) \ + template<int ImmT> \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf)) + + +SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) +SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) +SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 +SIMD_IWRAPPER_2_16(unpackhi_epi16); +SIMD_IWRAPPER_2_64(unpackhi_epi64); +SIMD_IWRAPPER_2_8(unpackhi_epi8); +SIMD_IWRAPPER_2_16(unpacklo_epi16); +SIMD_IWRAPPER_2_64(unpacklo_epi64); +SIMD_IWRAPPER_2_8(unpacklo_epi8); + +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +{ + __mmask64 m = 0xffffffffull; + return static_cast<uint32_t>( + _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +} + +#undef SIMD_DWRAPPER_1_ +#undef SIMD_DWRAPPER_1 +#undef SIMD_DWRAPPER_1I_ +#undef SIMD_DWRAPPER_1I +#undef SIMD_DWRAPPER_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_IWRAPPER_1_ +#undef SIMD_IWRAPPER_1_8 +#undef SIMD_IWRAPPER_1_16 +#undef SIMD_IWRAPPER_1_64 +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_1I_8 +#undef SIMD_IWRAPPER_1I_16 +#undef SIMD_IWRAPPER_1I_64 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2_8 +#undef SIMD_IWRAPPER_2_16 +#undef SIMD_IWRAPPER_2_64 diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl new file mode 100644 index 000000000..acd8ffd96 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl @@ -0,0 +1,35 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD256 AVX (512) implementation for Knights Family +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl new file mode 100644 index 000000000..fed6307f4 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl @@ -0,0 +1,217 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD16 AVX512 (F) implementation for Core processors +// +//============================================================================ + +#define SIMD_WRAPPER_1_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return intrin(a);\ + } + +#define SIMD_WRAPPER_1(op) \ + SIMD_WRAPPER_1_(op, _mm512_##op) + +#define SIMD_WRAPPER_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op) + +#define SIMD_WRAPPERI_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_castsi512_ps(_mm512_##intrin(\ + _mm512_castps_si512(a), _mm512_castps_si512(b)));\ + } + +#define SIMD_DWRAPPER_2(op) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##op(a, b);\ + } + +#define SIMD_WRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op) + +#define SIMD_DWRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op) + +#define SIMD_WRAPPER_3(op) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return _mm512_##op(a, b, c);\ + } + +#define SIMD_IWRAPPER_1(op) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return _mm512_##op(a);\ + } +#define SIMD_IWRAPPER_1_8(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1_4(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1I_(op, intrin) \ + template<int ImmT> \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return intrin(a, ImmT);\ + } +#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op) + +#define SIMD_IWRAPPER_2_(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op) + +#define SIMD_IWRAPPER_2_CMP(op, cmp) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return cmp(a, b);\ + } + +#define SIMD_IFWRAPPER_2(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\ + } + +#define SIMD_IWRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) + +private: + static SIMDINLINE Integer vmask(__mmask32 m) + { + return _mm512_maskz_set1_epi16(m, -1); + } + static SIMDINLINE Integer vmask(__mmask64 m) + { + return _mm512_maskz_set1_epi8(m, -1); + } +public: + +SIMD_IWRAPPER_2(add_epi8); // return a + b (int8) +SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) +SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) + +SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int) +SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int) +SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int) +SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int) + +SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16) + +template<CompareTypeInt CmpTypeT> +static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b) +{ + // Legacy vector mask generator + __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT)); + return vmask(result); +} +template<CompareTypeInt CmpTypeT> +static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b) +{ + // Legacy vector mask generator + __mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast<const int>(CmpTypeT)); + return vmask(result); +} + +SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8) +SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16) +SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8) +SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16) + +SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 +SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 +SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 +SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 + +SIMD_IWRAPPER_2(unpackhi_epi8); // See documentation for _mm512_unpackhi_epi8 +SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16 +SIMD_IWRAPPER_2(unpacklo_epi8); // See documentation for _mm512_unpacklo_epi8 + +SIMD_IWRAPPER_2(shuffle_epi8); + +static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a) +{ + __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si()); + return static_cast<uint64_t>(m); +} + + + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPERI_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_WRAPPER_2I_ +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3 +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_2 +#undef SIMD_IFWRAPPER_2 +#undef SIMD_IWRAPPER_2I +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_1I +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_2 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2I + diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl new file mode 100644 index 000000000..690ab386b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl @@ -0,0 +1,161 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD16 AVX512 (F) implementation for Knights Family Processors +// +//============================================================================ + +#define SIMD_WRAPPER_1_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return intrin(a);\ + } + +#define SIMD_WRAPPER_1(op) \ + SIMD_WRAPPER_1_(op, _mm512_##op) + +#define SIMD_WRAPPER_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op) + +#define SIMD_WRAPPERI_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_castsi512_ps(_mm512_##intrin(\ + _mm512_castps_si512(a), _mm512_castps_si512(b)));\ + } + +#define SIMD_DWRAPPER_2(op) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##op(a, b);\ + } + +#define SIMD_WRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op) + +#define SIMD_DWRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op) + +#define SIMD_WRAPPER_3(op) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return _mm512_##op(a, b, c);\ + } + +#define SIMD_IWRAPPER_1(op) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return _mm512_##op(a);\ + } +#define SIMD_IWRAPPER_1_8(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1_4(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1I_(op, intrin) \ + template<int ImmT> \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return intrin(a, ImmT);\ + } +#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op) + +#define SIMD_IWRAPPER_2_(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op) + +#define SIMD_IWRAPPER_2_CMP(op, cmp) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return cmp(a, b);\ + } + +#define SIMD_IFWRAPPER_2(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\ + } + +#define SIMD_IWRAPPER_2I_(op, intrin) \ + template<int ImmT>\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) + +SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int) +SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int) +SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int) +SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int) + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPERI_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_WRAPPER_2I_ +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3 +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_2 +#undef SIMD_IFWRAPPER_2 +#undef SIMD_IWRAPPER_2I +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_1I +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_2 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2I + diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl new file mode 100644 index 000000000..3e36ce5bd --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl @@ -0,0 +1,27 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +// Implement mask-enabled SIMD functions diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl new file mode 100644 index 000000000..3e36ce5bd --- /dev/null +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl @@ -0,0 +1,27 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +// Implement mask-enabled SIMD functions diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h index 1d8546959..00c3a87c1 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h @@ -109,6 +109,7 @@ template <> struct ConservativeRastFETraits<StandardRastT> { typedef std::false_type IsConservativeT; + typedef std::integral_constant<uint32_t, 0> BoundingBoxOffsetT; }; ////////////////////////////////////////////////////////////////////////// diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h index 590c56903..fafc36d1d 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h @@ -30,7 +30,7 @@ #include "format_conversion.h" INLINE -void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps) +void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps) { simdscalari stencil = _simd_castps_si(stencilps); @@ -81,7 +81,7 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds template<SWR_FORMAT depthFormatT> -simdscalar QuantizeDepth(simdscalar depth) +simdscalar QuantizeDepth(simdscalar const &depth) { SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0); uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0); @@ -117,7 +117,7 @@ simdscalar QuantizeDepth(simdscalar depth) INLINE simdscalar DepthStencilTest(const API_STATE* pState, - bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, + bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask, uint8_t *pStencilBase, simdscalar* pStencilMask) { static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format"); @@ -132,7 +132,7 @@ simdscalar DepthStencilTest(const API_STATE* pState, // clamp Z to viewport [minZ..maxZ] simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); - interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); + simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); if (pDSState->depthTestEnable) { @@ -215,7 +215,7 @@ simdscalar DepthStencilTest(const API_STATE* pState, INLINE void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState, - bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, + bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, uint8_t *pStencilBase, const simdscalar& stencilMask) { if (pDSState->depthWriteEnable) @@ -223,7 +223,7 @@ void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_ST // clamp Z to viewport [minZ..maxZ] simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); - interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); + simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ)); simdscalar vMask = _simd_and_ps(depthMask, coverageMask); _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ); diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h index ba2df2292..12c2a3031 100644 --- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h +++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h @@ -91,16 +91,18 @@ static inline void ConvertEnvToKnob(const char* pOverride, std::string& knobValu template <typename T> static inline void InitKnob(T& knob) { - - // TODO, read registry first - - // Second, read environment variables + // Read environment variables const char* pOverride = getenv(knob.Name()); if (pOverride) { - auto knobValue = knob.Value(); + auto knobValue = knob.DefaultValue(); ConvertEnvToKnob(pOverride, knobValue); knob.Value(knobValue); } + else + { + // Set default value + knob.Value(knob.DefaultValue()); + } } diff --git a/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h b/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h index b38d63eac..7f969a303 100644 --- a/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h +++ b/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h @@ -50,7 +50,7 @@ trace_dump_query_type(unsigned value) if (!trace_dumping_enabled_locked()) return; - trace_dump_enum(util_dump_query_type(value, FALSE)); + trace_dump_enum(util_str_query_type(value, FALSE)); } diff --git a/lib/mesa/src/gallium/drivers/vc4/Automake.inc b/lib/mesa/src/gallium/drivers/vc4/Automake.inc index 5664c2ab1..b1aa9726b 100644 --- a/lib/mesa/src/gallium/drivers/vc4/Automake.inc +++ b/lib/mesa/src/gallium/drivers/vc4/Automake.inc @@ -4,6 +4,7 @@ TARGET_DRIVERS += vc4 TARGET_CPPFLAGS += -DGALLIUM_VC4 TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \ - $(top_builddir)/src/gallium/drivers/vc4/libvc4.la + $(top_builddir)/src/gallium/drivers/vc4/libvc4.la \ + $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la endif diff --git a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c index b926d35a6..2da797899 100644 --- a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c +++ b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -255,8 +255,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, uint8_t max_y_tile = args->max_y_tile; uint8_t xtiles = max_x_tile - min_x_tile + 1; uint8_t ytiles = max_y_tile - min_y_tile + 1; - uint8_t x, y; + uint8_t xi, yi; uint32_t size, loop_body_size; + bool positive_x = true; + bool positive_y = true; + + if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) { + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)) + positive_x = false; + if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) + positive_y = false; + } size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; @@ -348,10 +357,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, rcl_u32(setup, 0); /* no address, since we're in None mode */ } - for (y = min_y_tile; y <= max_y_tile; y++) { - for (x = min_x_tile; x <= max_x_tile; x++) { - bool first = (x == min_x_tile && y == min_y_tile); - bool last = (x == max_x_tile && y == max_y_tile); + for (yi = 0; yi < ytiles; yi++) { + int y = positive_y ? min_y_tile + yi : max_y_tile - yi; + for (xi = 0; xi < xtiles; xi++) { + int x = positive_x ? min_x_tile + xi : max_x_tile - xi; + bool first = (xi == 0 && yi == 0); + bool last = (xi == xtiles - 1 && yi == ytiles - 1); emit_tile(exec, setup, x, y, first, last); } diff --git a/lib/mesa/src/gallium/drivers/vc5/Automake.inc b/lib/mesa/src/gallium/drivers/vc5/Automake.inc new file mode 100644 index 000000000..57c8a28ef --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/Automake.inc @@ -0,0 +1,14 @@ +if HAVE_GALLIUM_VC5 + +TARGET_DRIVERS += vc5 +TARGET_CPPFLAGS += -DGALLIUM_VC5 +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/winsys/vc5/drm/libvc5drm.la \ + $(top_builddir)/src/gallium/drivers/vc5/libvc5.la \ + $(top_builddir)/src/broadcom/libbroadcom.la + +if !HAVE_GALLIUM_VC4 +TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la +endif + +endif diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.am b/lib/mesa/src/gallium/drivers/vc5/Makefile.am new file mode 100644 index 000000000..42d4be73d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.am @@ -0,0 +1,40 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/broadcom \ + $(LIBDRM_CFLAGS) \ + $(VC5_SIMULATOR_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $() + +noinst_LTLIBRARIES = libvc5.la + +libvc5_la_SOURCES = $(C_SOURCES) + +libvc5_la_LDFLAGS = \ + $(VC5_SIMULATOR_LIBS) \ + $(NULL) diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.in b/lib/mesa/src/gallium/drivers/vc5/Makefile.in new file mode 100644 index 000000000..a19c97f88 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.in @@ -0,0 +1,939 @@ +# Makefile.in generated by automake 1.15.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2017 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@HAVE_LIBDRM_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) + +@HAVE_DRISW_TRUE@am__append_2 = \ +@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la + +@HAVE_DRISW_KMS_TRUE@am__append_3 = \ +@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ +@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) + +subdir = src/gallium/drivers/vc5 +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_gnu_make.m4 \ + $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ + $(top_srcdir)/m4/ax_gcc_builtin.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_flex.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/VERSION $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libvc5_la_LIBADD = +am__objects_1 = vc5_blit.lo vc5_bufmgr.lo vc5_cl.lo vc5_context.lo \ + vc5_draw.lo vc5_emit.lo vc5_fence.lo vc5_formats.lo vc5_job.lo \ + vc5_program.lo vc5_query.lo vc5_rcl.lo vc5_resource.lo \ + vc5_screen.lo vc5_simulator.lo vc5_state.lo vc5_tiling.lo \ + vc5_uniforms.lo +am_libvc5_la_OBJECTS = $(am__objects_1) +libvc5_la_OBJECTS = $(am_libvc5_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libvc5_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libvc5_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libvc5_la_SOURCES) +DIST_SOURCES = $(libvc5_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ + $(top_srcdir)/bin/depcomp \ + $(top_srcdir)/src/gallium/Automake.inc +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ +AMDGPU_LIBS = @AMDGPU_LIBS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +ANDROID_CFLAGS = @ANDROID_CFLAGS@ +ANDROID_LIBS = @ANDROID_LIBS@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BSYMBOLIC = @BSYMBOLIC@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ +CLOCK_LIB = @CLOCK_LIB@ +CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ +DEFINES = @DEFINES@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLOPEN_LIBS = @DLOPEN_LIBS@ +DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ +DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRIGL_CFLAGS = @DRIGL_CFLAGS@ +DRIGL_LIBS = @DRIGL_LIBS@ +DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ +DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ +DRI_LIB_DEPS = @DRI_LIB_DEPS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGL_CFLAGS = @EGL_CFLAGS@ +EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGREP = @EGREP@ +ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ +ETNAVIV_LIBS = @ETNAVIV_LIBS@ +EXEEXT = @EXEEXT@ +EXPAT_CFLAGS = @EXPAT_CFLAGS@ +EXPAT_LIBS = @EXPAT_LIBS@ +FGREP = @FGREP@ +FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ +FREEDRENO_LIBS = @FREEDRENO_LIBS@ +GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ +GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ +GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ +GC_SECTIONS = @GC_SECTIONS@ +GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ +GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ +GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ +GLPROTO_LIBS = @GLPROTO_LIBS@ +GLVND_CFLAGS = @GLVND_CFLAGS@ +GLVND_LIBS = @GLVND_LIBS@ +GLX_TLS = @GLX_TLS@ +GL_LIB = @GL_LIB@ +GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GREP = @GREP@ +HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +I915_CFLAGS = @I915_CFLAGS@ +I915_LIBS = @I915_LIBS@ +INDENT = @INDENT@ +INDENT_FLAGS = @INDENT_FLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ +LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ +LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ +LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ +LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBELF_CFLAGS = @LIBELF_CFLAGS@ +LIBELF_LIBS = @LIBELF_LIBS@ +LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIB_DIR = @LIB_DIR@ +LIB_EXT = @LIB_EXT@ +LIPO = @LIPO@ +LLVM_CFLAGS = @LLVM_CFLAGS@ +LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ +LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ +LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBS = @LLVM_LIBS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ +MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ +NINE_MAJOR = @NINE_MAJOR@ +NINE_MINOR = @NINE_MINOR@ +NINE_TINY = @NINE_TINY@ +NINE_VERSION = @NINE_VERSION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ +NOUVEAU_LIBS = @NOUVEAU_LIBS@ +NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ +NVVIEUX_LIBS = @NVVIEUX_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@ +OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@ +OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@ +OPENCL_LIBNAME = @OPENCL_LIBNAME@ +OPENCL_VERSION = @OPENCL_VERSION@ +OSMESA_LIB = @OSMESA_LIB@ +OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ +OSMESA_PC_REQ = @OSMESA_PC_REQ@ +OSMESA_VERSION = @OSMESA_VERSION@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSIX_SHELL = @POSIX_SHELL@ +PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ +PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PWR8_CFLAGS = @PWR8_CFLAGS@ +PYTHON2 = @PYTHON2@ +RADEON_CFLAGS = @RADEON_CFLAGS@ +RADEON_LIBS = @RADEON_LIBS@ +RANLIB = @RANLIB@ +RM = @RM@ +SED = @SED@ +SELINUX_CFLAGS = @SELINUX_CFLAGS@ +SELINUX_LIBS = @SELINUX_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ +SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ +SSE41_CFLAGS = @SSE41_CFLAGS@ +STRIP = @STRIP@ +SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ +SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ +SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ +SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ +SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VA_CFLAGS = @VA_CFLAGS@ +VA_LIBS = @VA_LIBS@ +VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ +VA_MAJOR = @VA_MAJOR@ +VA_MINOR = @VA_MINOR@ +VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@ +VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@ +VDPAU_CFLAGS = @VDPAU_CFLAGS@ +VDPAU_LIBS = @VDPAU_LIBS@ +VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ +VDPAU_MAJOR = @VDPAU_MAJOR@ +VDPAU_MINOR = @VDPAU_MINOR@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +VL_CFLAGS = @VL_CFLAGS@ +VL_LIBS = @VL_LIBS@ +VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ +WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@ +WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@ +WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ +WAYLAND_SCANNER = @WAYLAND_SCANNER@ +WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ +WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ +WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@ +WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@ +WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@ +X11_INCLUDES = @X11_INCLUDES@ +XA_MAJOR = @XA_MAJOR@ +XA_MINOR = @XA_MINOR@ +XA_TINY = @XA_TINY@ +XA_VERSION = @XA_VERSION@ +XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ +XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@ +XCB_DRI3_LIBS = @XCB_DRI3_LIBS@ +XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ +XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ +XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ +XLIBGL_LIBS = @XLIBGL_LIBS@ +XVMC_CFLAGS = @XVMC_CFLAGS@ +XVMC_LIBS = @XVMC_LIBS@ +XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ +XVMC_MAJOR = @XVMC_MAJOR@ +XVMC_MINOR = @XVMC_MINOR@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acv_mako_found = @acv_mako_found@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +ifGNUmake = @ifGNUmake@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +C_SOURCES := \ + vc5_blit.c \ + vc5_bufmgr.c \ + vc5_bufmgr.h \ + vc5_cl.c \ + vc5_cl.h \ + vc5_context.c \ + vc5_context.h \ + vc5_draw.c \ + vc5_drm.h \ + vc5_emit.c \ + vc5_fence.c \ + vc5_formats.c \ + vc5_job.c \ + vc5_program.c \ + vc5_query.c \ + vc5_rcl.c \ + vc5_resource.c \ + vc5_resource.h \ + vc5_screen.c \ + vc5_screen.h \ + vc5_simulator.c \ + vc5_state.c \ + vc5_tiling.c \ + vc5_tiling.h \ + vc5_uniforms.c \ + $() + +GALLIUM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + + +# src/gallium/auxiliary must appear before src/gallium/drivers +# because there are stupidly two rbug_context.h files in +# different directories, and which one is included by the +# preprocessor is determined by the ordering of the -I flags. +GALLIUM_DRIVER_CFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_DRIVER_CXXFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CXXFLAGS) + +GALLIUM_TARGET_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/loader \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(top_builddir)/src/util/ \ + -I$(top_builddir)/src/gallium/drivers/ \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) \ + $(LIBDRM_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ + $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) +GALLIUM_WINSYS_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_2) $(am__append_3) +AM_CFLAGS = \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/broadcom \ + $(LIBDRM_CFLAGS) \ + $(VC5_SIMULATOR_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $() + +noinst_LTLIBRARIES = libvc5.la +libvc5_la_SOURCES = $(C_SOURCES) +libvc5_la_LDFLAGS = \ + $(VC5_SIMULATOR_LIBS) \ + $(NULL) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/vc5/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/gallium/drivers/vc5/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libvc5.la: $(libvc5_la_OBJECTS) $(libvc5_la_DEPENDENCIES) $(EXTRA_libvc5_la_DEPENDENCIES) + $(AM_V_CCLD)$(libvc5_la_LINK) $(libvc5_la_OBJECTS) $(libvc5_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_blit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_bufmgr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_cl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_context.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_draw.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_emit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_fence.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_formats.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_job.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_program.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_query.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_rcl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_resource.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_screen.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_simulator.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_state.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_tiling.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_uniforms.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.sources b/lib/mesa/src/gallium/drivers/vc5/Makefile.sources new file mode 100644 index 000000000..3fb6a0d09 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.sources @@ -0,0 +1,27 @@ +C_SOURCES := \ + vc5_blit.c \ + vc5_bufmgr.c \ + vc5_bufmgr.h \ + vc5_cl.c \ + vc5_cl.h \ + vc5_context.c \ + vc5_context.h \ + vc5_draw.c \ + vc5_drm.h \ + vc5_emit.c \ + vc5_fence.c \ + vc5_formats.c \ + vc5_job.c \ + vc5_program.c \ + vc5_query.c \ + vc5_rcl.c \ + vc5_resource.c \ + vc5_resource.h \ + vc5_screen.c \ + vc5_screen.h \ + vc5_simulator.c \ + vc5_state.c \ + vc5_tiling.c \ + vc5_tiling.h \ + vc5_uniforms.c \ + $() diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c b/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c new file mode 100644 index 000000000..64811416e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2015-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "vc5_context.h" + +#if 0 +static struct pipe_surface * +vc5_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return pctx->create_surface(pctx, prsc, &tmpl); +} + +static bool +is_tile_unaligned(unsigned size, unsigned tile_size) +{ + return size & (tile_size - 1); +} + +static bool +vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + bool msaa = (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1); + int tile_width = msaa ? 32 : 64; + int tile_height = msaa ? 32 : 64; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + return false; + + if (info->scissor_enable) + return false; + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != info->src.box.x || + info->dst.box.y != info->src.box.y || + info->dst.box.width != info->src.box.width || + info->dst.box.height != info->src.box.height) { + return false; + } + + int dst_surface_width = u_minify(info->dst.resource->width0, + info->dst.level); + int dst_surface_height = u_minify(info->dst.resource->height0, + info->dst.level); + if (is_tile_unaligned(info->dst.box.x, tile_width) || + is_tile_unaligned(info->dst.box.y, tile_height) || + (is_tile_unaligned(info->dst.box.width, tile_width) && + info->dst.box.x + info->dst.box.width != dst_surface_width) || + (is_tile_unaligned(info->dst.box.height, tile_height) && + info->dst.box.y + info->dst.box.height != dst_surface_height)) { + return false; + } + + /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the + * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our + * destination surface) to determine the stride. This may be wrong + * when reading from texture miplevels > 0, which are stored in + * POT-sized areas. For MSAA, the tile addresses are computed + * explicitly by the RCL, but still use the destination width to + * determine the stride (which could be fixed by explicitly supplying + * it in the ABI). + */ + struct vc5_resource *rsc = vc5_resource(info->src.resource); + + uint32_t stride; + + if (info->src.resource->nr_samples > 1) + stride = align(dst_surface_width, 32) * 4 * rsc->cpp; + /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T) + stride = align(dst_surface_width * rsc->cpp, 128); */ + else + stride = align(dst_surface_width * rsc->cpp, 16); + + if (stride != rsc->slices[info->src.level].stride) + return false; + + if (info->dst.resource->format != info->src.resource->format) + return false; + + if (false) { + fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", + info->src.box.x, + info->src.box.y, + info->dst.box.x, + info->dst.box.y, + info->dst.box.width, + info->dst.box.height); + } + + struct pipe_surface *dst_surf = + vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct pipe_surface *src_surf = + vc5_get_blit_surface(pctx, info->src.resource, info->src.level); + + vc5_flush_jobs_reading_resource(vc5, info->src.resource); + + struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL); + pipe_surface_reference(&job->color_read, src_surf); + + /* If we're resolving from MSAA to single sample, we still need to run + * the engine in MSAA mode for the load. + */ + if (!job->msaa && info->src.resource->nr_samples > 1) { + job->msaa = true; + job->tile_width = 32; + job->tile_height = 32; + } + + job->draw_min_x = info->dst.box.x; + job->draw_min_y = info->dst.box.y; + job->draw_max_x = info->dst.box.x + info->dst.box.width; + job->draw_max_y = info->dst.box.y + info->dst.box.height; + job->draw_width = dst_surf->width; + job->draw_height = dst_surf->height; + + job->tile_width = tile_width; + job->tile_height = tile_height; + job->msaa = msaa; + job->needs_flush = true; + job->resolve |= PIPE_CLEAR_COLOR; + + vc5_job_submit(vc5, job); + + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); + + return true; +} +#endif + +void +vc5_blitter_save(struct vc5_context *vc5) +{ + util_blitter_save_fragment_constant_buffer_slot(vc5->blitter, + vc5->constbuf[PIPE_SHADER_FRAGMENT].cb); + util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb); + util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx); + util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); + util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer); + util_blitter_save_viewport(vc5->blitter, &vc5->viewport); + util_blitter_save_scissor(vc5->blitter, &vc5->scissor); + util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs); + util_blitter_save_blend(vc5->blitter, vc5->blend); + util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa); + util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref); + util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask); + util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer); + util_blitter_save_fragment_sampler_states(vc5->blitter, + vc5->fragtex.num_samplers, + (void **)vc5->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc5->blitter, + vc5->fragtex.num_textures, vc5->fragtex.textures); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); +} + +static bool +vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + + if (!util_blitter_is_blit_supported(vc5->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + vc5_blitter_save(vc5); + util_blitter_blit(vc5->blitter, info); + + return true; +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +void +vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + +#if 0 + if (vc5_tile_blit(pctx, blit_info)) + return; +#endif + + vc5_render_blit(pctx, &info); +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c new file mode 100644 index 000000000..c6c06dcfd --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c @@ -0,0 +1,580 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <errno.h> +#include <err.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/ralloc.h" + +#include "vc5_context.h" +#include "vc5_screen.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +static bool dump_stats = false; + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache); + +static void +vc5_bo_dump_stats(struct vc5_screen *screen) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + + fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); + fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024); + fprintf(stderr, " BOs cached: %d\n", cache->bo_count); + fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024); + + if (!list_empty(&cache->time_list)) { + struct vc5_bo *first = LIST_ENTRY(struct vc5_bo, + cache->time_list.next, + time_list); + struct vc5_bo *last = LIST_ENTRY(struct vc5_bo, + cache->time_list.prev, + time_list); + + fprintf(stderr, " oldest cache time: %ld\n", + (long)first->free_time); + fprintf(stderr, " newest cache time: %ld\n", + (long)last->free_time); + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + fprintf(stderr, " now: %ld\n", + time.tv_sec); + } +} + +static void +vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo) +{ + list_del(&bo->time_list); + list_del(&bo->size_list); + cache->bo_count--; + cache->bo_size -= bo->size; +} + +static struct vc5_bo * +vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = size / 4096 - 1; + + if (cache->size_list_size <= page_index) + return NULL; + + struct vc5_bo *bo = NULL; + mtx_lock(&cache->lock); + if (!list_empty(&cache->size_list[page_index])) { + bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next, + size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!vc5_bo_wait(bo, 0, NULL)) { + mtx_unlock(&cache->lock); + return NULL; + } + + pipe_reference_init(&bo->reference, 1); + vc5_bo_remove_from_cache(cache, bo); + + bo->name = name; + } + mtx_unlock(&cache->lock); + return bo; +} + +struct vc5_bo * +vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo *bo; + int ret; + + size = align(size, 4096); + + bo = vc5_bo_from_cache(screen, size, name); + if (bo) { + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb from cache:\n", + name, size / 1024); + vc5_bo_dump_stats(screen); + } + return bo; + } + + bo = CALLOC_STRUCT(vc5_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->size = size; + bo->name = name; + bo->private = true; + + retry: + ; + + bool cleared_and_retried = false; + struct drm_vc5_create_bo create = { + .size = size + }; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_CREATE_BO, &create); + bo->handle = create.handle; + bo->offset = create.offset; + + if (ret != 0) { + if (!list_empty(&screen->bo_cache.time_list) && + !cleared_and_retried) { + cleared_and_retried = true; + vc5_bo_cache_free_all(&screen->bo_cache); + goto retry; + } + + free(bo); + return NULL; + } + + screen->bo_count++; + screen->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); + vc5_bo_dump_stats(screen); + } + + return bo; +} + +void +vc5_bo_last_unreference(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + mtx_lock(&screen->bo_cache.lock); + vc5_bo_last_unreference_locked_timed(bo, time.tv_sec); + mtx_unlock(&screen->bo_cache.lock); +} + +static void +vc5_bo_free(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + if (bo->map) { + if (using_vc5_simulator && bo->name && + strcmp(bo->name, "winsys") == 0) { + free(bo->map); + } else { + munmap(bo->map, bo->size); + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + } + } + + struct drm_gem_close c; + memset(&c, 0, sizeof(c)); + c.handle = bo->handle; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); + if (ret != 0) + fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + + screen->bo_count--; + screen->bo_size -= bo->size; + + if (dump_stats) { + fprintf(stderr, "Freed %s%s%dkb:\n", + bo->name ? bo->name : "", + bo->name ? " " : "", + bo->size / 1024); + vc5_bo_dump_stats(screen); + } + + free(bo); +} + +static void +free_stale_bos(struct vc5_screen *screen, time_t time) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + bool freed_any = false; + + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + if (dump_stats && !freed_any) { + fprintf(stderr, "Freeing stale BOs:\n"); + vc5_bo_dump_stats(screen); + freed_any = true; + } + + /* If it's more than a second old, free it. */ + if (time - bo->free_time > 2) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } else { + break; + } + } + + if (dump_stats && freed_any) { + fprintf(stderr, "Freed stale BOs:\n"); + vc5_bo_dump_stats(screen); + } +} + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache) +{ + mtx_lock(&cache->lock); + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } + mtx_unlock(&cache->lock); +} + +void +vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time) +{ + struct vc5_screen *screen = bo->screen; + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = bo->size / 4096 - 1; + + if (!bo->private) { + vc5_bo_free(bo); + return; + } + + if (cache->size_list_size <= page_index) { + struct list_head *new_list = + ralloc_array(screen, struct list_head, page_index + 1); + + /* Move old list contents over (since the array has moved, and + * therefore the pointers to the list heads have to change). + */ + for (int i = 0; i < cache->size_list_size; i++) { + struct list_head *old_head = &cache->size_list[i]; + if (list_empty(old_head)) + list_inithead(&new_list[i]); + else { + new_list[i].next = old_head->next; + new_list[i].prev = old_head->prev; + new_list[i].next->prev = &new_list[i]; + new_list[i].prev->next = &new_list[i]; + } + } + for (int i = cache->size_list_size; i < page_index + 1; i++) + list_inithead(&new_list[i]); + + cache->size_list = new_list; + cache->size_list_size = page_index + 1; + } + + bo->free_time = time; + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); + cache->bo_count++; + cache->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Freed %s %dkb to cache:\n", + bo->name, bo->size / 1024); + vc5_bo_dump_stats(screen); + } + bo->name = NULL; + + free_stale_bos(screen, time); +} + +static struct vc5_bo * +vc5_bo_open_handle(struct vc5_screen *screen, + uint32_t winsys_stride, + uint32_t handle, uint32_t size) +{ + struct vc5_bo *bo; + + assert(size); + + mtx_lock(&screen->bo_handles_mutex); + + bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); + if (bo) { + pipe_reference(NULL, &bo->reference); + goto done; + } + + bo = CALLOC_STRUCT(vc5_bo); + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->handle = handle; + bo->size = size; + bo->name = "winsys"; + bo->private = false; + +#ifdef USE_VC5_SIMULATOR + vc5_simulator_open_from_handle(screen->fd, winsys_stride, + bo->handle, bo->size); + bo->map = malloc(bo->size); +#endif + + util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + +done: + mtx_unlock(&screen->bo_handles_mutex); + return bo; +} + +struct vc5_bo * +vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride) +{ + struct drm_gem_open o = { + .name = name + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); + if (ret) { + fprintf(stderr, "Failed to open bo %d: %s\n", + name, strerror(errno)); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size); +} + +struct vc5_bo * +vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride) +{ + uint32_t handle; + int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); + int size; + if (ret) { + fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd); + return NULL; + } + + /* Determine the size of the bo we were handed. */ + size = lseek(fd, 0, SEEK_END); + if (size == -1) { + fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, handle, size); +} + +int +vc5_bo_get_dmabuf(struct vc5_bo *bo) +{ + int fd; + int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle, + O_CLOEXEC, &fd); + if (ret != 0) { + fprintf(stderr, "Failed to export gem bo %d to dmabuf\n", + bo->handle); + return -1; + } + + mtx_lock(&bo->screen->bo_handles_mutex); + bo->private = false; + util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&bo->screen->bo_handles_mutex); + + return fd; +} + +bool +vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) +{ + struct drm_gem_flink flink = { + .handle = bo->handle, + }; + int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) { + fprintf(stderr, "Failed to flink bo %d: %s\n", + bo->handle, strerror(errno)); + free(bo); + return false; + } + + bo->private = false; + *name = flink.name; + + return true; +} + +static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns) +{ + struct drm_vc5_wait_seqno wait = { + .seqno = seqno, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason) +{ + if (screen->finished_seqno >= seqno) + return true; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) { + fprintf(stderr, "Blocking on seqno %lld for %s\n", + (long long)seqno, reason); + } + } + + int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + screen->finished_seqno = seqno; + return true; +} + +static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) +{ + struct drm_vc5_wait_bo wait = { + .handle = handle, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_BO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason) +{ + struct vc5_screen *screen = bo->screen; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) { + fprintf(stderr, "Blocking on %s BO for %s\n", + bo->name, reason); + } + } + + int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + return true; +} + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo) +{ + uint64_t offset; + int ret; + + if (bo->map) + return bo->map; + + struct drm_vc5_mmap_bo map; + memset(&map, 0, sizeof(map)); + map.handle = bo->handle; + ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_VC5_MMAP_BO, &map); + offset = map.offset; + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->screen->fd, offset); + if (bo->map == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + bo->handle, (long long)offset, bo->size); + abort(); + } + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); + + return bo->map; +} + +void * +vc5_bo_map(struct vc5_bo *bo) +{ + void *map = vc5_bo_map_unsynchronized(bo); + + bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map"); + if (!ok) { + fprintf(stderr, "BO wait for map failed\n"); + abort(); + } + + return map; +} + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_bo_cache *cache = &screen->bo_cache; + + vc5_bo_cache_free_all(cache); + + if (dump_stats) { + fprintf(stderr, "BO stats after screen destroy:\n"); + vc5_bo_dump_stats(screen); + } +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h new file mode 100644 index 000000000..cca2b2287 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_BUFMGR_H +#define VC5_BUFMGR_H + +#include <stdint.h> +#include "util/u_hash_table.h" +#include "util/u_inlines.h" +#include "util/list.h" +#include "vc5_screen.h" + +struct vc5_context; + +struct vc5_bo { + struct pipe_reference reference; + struct vc5_screen *screen; + void *map; + const char *name; + uint32_t handle; + uint32_t size; + + /* Address of the BO in our page tables. */ + uint32_t offset; + + /** Entry in the linked list of buffers freed, by age. */ + struct list_head time_list; + /** Entry in the per-page-count linked list of buffers freed (by age). */ + struct list_head size_list; + /** Approximate second when the bo was freed. */ + time_t free_time; + /** + * Whether only our process has a reference to the BO (meaning that + * it's safe to reuse it in the BO cache). + */ + bool private; +}; + +struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, + const char *name); +void vc5_bo_last_unreference(struct vc5_bo *bo); +void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time); +struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride); +struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, + uint32_t winsys_stride); +bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name); +int vc5_bo_get_dmabuf(struct vc5_bo *bo); + +static inline void +vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo) +{ + if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) + vc5_bo_last_unreference(*old_bo); + *old_bo = new_bo; +} + +static inline struct vc5_bo * +vc5_bo_reference(struct vc5_bo *bo) +{ + pipe_reference(NULL, &bo->reference); + return bo; +} + +static inline void +vc5_bo_unreference(struct vc5_bo **bo) +{ + struct vc5_screen *screen; + if (!*bo) + return; + + if ((*bo)->private) { + /* Avoid the mutex for private BOs */ + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference(*bo); + } else { + screen = (*bo)->screen; + mtx_lock(&screen->bo_handles_mutex); + + if (pipe_reference(&(*bo)->reference, NULL)) { + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)(*bo)->handle); + vc5_bo_last_unreference(*bo); + } + + mtx_unlock(&screen->bo_handles_mutex); + } + + *bo = NULL; +} + +static inline void +vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time) +{ + if (!*bo) + return; + + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference_locked_timed(*bo, time); + *bo = NULL; +} + +void * +vc5_bo_map(struct vc5_bo *bo); + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo); + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason); + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason); + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen); + +#endif /* VC5_BUFMGR_H */ + diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c new file mode 100644 index 000000000..37d96c436 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c @@ -0,0 +1,87 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +void +vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl) +{ + cl->base = NULL; + cl->next = cl->base; + cl->size = 0; + cl->job = job; +} + +uint32_t +vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment) +{ + uint32_t offset = align(cl_offset(cl), alignment); + + if (offset + space <= cl->size) { + cl->next = cl->base + offset; + return offset; + } + + vc5_bo_unreference(&cl->bo); + cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL"); + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; + + return 0; +} + +void +vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space) +{ + if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + return; + + struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL"); + assert(space <= new_bo->size); + + /* Chain to the new BO from the old one. */ + if (cl->bo) { + cl_emit(cl, BRANCH, branch) { + branch.address = cl_address(new_bo, 0); + } + vc5_bo_unreference(&cl->bo); + } else { + /* Root the first RCL/BCL BO in the job. */ + vc5_job_add_bo(cl->job, cl->bo); + } + + cl->bo = new_bo; + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; +} + +void +vc5_destroy_cl(struct vc5_cl *cl) +{ + vc5_bo_unreference(&cl->bo); +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h new file mode 100644 index 000000000..64ccac805 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h @@ -0,0 +1,251 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CL_H +#define VC5_CL_H + +#include <stdint.h> + +#include "util/u_math.h" +#include "util/macros.h" + +struct vc5_bo; +struct vc5_job; +struct vc5_cl; + +/** + * Undefined structure, used for typechecking that you're passing the pointers + * to these functions correctly. + */ +struct vc5_cl_out; + +/** A reference to a BO used in the CL packing functions */ +struct vc5_cl_reloc { + struct vc5_bo *bo; + uint32_t offset; +}; + +static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *); + +#define __gen_user_data struct vc5_cl +#define __gen_address_type struct vc5_cl_reloc +#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ + (reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +struct vc5_cl { + void *base; + struct vc5_job *job; + struct vc5_cl_out *next; + struct vc5_bo *bo; + uint32_t size; +}; + +void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl); +void vc5_destroy_cl(struct vc5_cl *cl); +void vc5_dump_cl(void *cl, uint32_t size, bool is_render); +uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo); + +struct PACKED unaligned_16 { uint16_t x; }; +struct PACKED unaligned_32 { uint32_t x; }; + +static inline uint32_t cl_offset(struct vc5_cl *cl) +{ + return (char *)cl->next - (char *)cl->base; +} + +static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl) +{ + return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) }; +} + +static inline void +cl_advance(struct vc5_cl_out **cl, uint32_t n) +{ + (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n); +} + +static inline struct vc5_cl_out * +cl_start(struct vc5_cl *cl) +{ + return cl->next; +} + +static inline void +cl_end(struct vc5_cl *cl, struct vc5_cl_out *next) +{ + cl->next = next; + assert(cl_offset(cl) <= cl->size); +} + + +static inline void +put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val) +{ + struct unaligned_32 *p = (void *)ptr; + p->x = val; +} + +static inline void +put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val) +{ + struct unaligned_16 *p = (void *)ptr; + p->x = val; +} + +static inline void +cl_u8(struct vc5_cl_out **cl, uint8_t n) +{ + *(uint8_t *)(*cl) = n; + cl_advance(cl, 1); +} + +static inline void +cl_u16(struct vc5_cl_out **cl, uint16_t n) +{ + put_unaligned_16(*cl, n); + cl_advance(cl, 2); +} + +static inline void +cl_u32(struct vc5_cl_out **cl, uint32_t n) +{ + put_unaligned_32(*cl, n); + cl_advance(cl, 4); +} + +static inline void +cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n) +{ + *(uint32_t *)(*cl) = n; + cl_advance(cl, 4); +} + +static inline void +cl_aligned_reloc(struct vc5_cl *cl, + struct vc5_cl_out **cl_out, + struct vc5_bo *bo, uint32_t offset) +{ + cl_aligned_u32(cl_out, bo->offset + offset); + vc5_job_add_bo(cl->job, bo); +} + +static inline void +cl_ptr(struct vc5_cl_out **cl, void *ptr) +{ + *(struct vc5_cl_out **)(*cl) = ptr; + cl_advance(cl, sizeof(void *)); +} + +static inline void +cl_f(struct vc5_cl_out **cl, float f) +{ + cl_u32(cl, fui(f)); +} + +static inline void +cl_aligned_f(struct vc5_cl_out **cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + +/** + * Reference to a BO with its associated offset, used in the pack process. + */ +static inline struct vc5_cl_reloc +cl_address(struct vc5_bo *bo, uint32_t offset) +{ + struct vc5_cl_reloc reloc = { + .bo = bo, + .offset = offset, + }; + return reloc; +} + +uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align); +void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size); + +#define cl_packet_header(packet) V3D33_ ## packet ## _header +#define cl_packet_length(packet) V3D33_ ## packet ## _length +#define cl_packet_pack(packet) V3D33_ ## packet ## _pack +#define cl_packet_struct(packet) V3D33_ ## packet + +static inline void * +cl_get_emit_space(struct vc5_cl_out **cl, size_t size) +{ + void *addr = *cl; + cl_advance(cl, size); + return addr; +} + +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + */ +#define cl_emit(cl, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \ + cl_packet_length(packet))); \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_prepacked(cl, packet) do { \ + memcpy((cl)->next, packet, sizeof(*packet)); \ + cl_advance(&(cl)->next, sizeof(*packet)); \ +} while (0) + +/** + * Helper function called by the XML-generated pack functions for filling in + * an address field in shader records. + * + * Since we have a private address space as of VC5, our BOs can have lifelong + * offsets, and all the kernel needs to know is which BOs need to be paged in + * for this exec. + */ +static inline void +cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc) +{ + if (reloc->bo) + vc5_job_add_bo(cl->job, reloc->bo); +} + +#endif /* VC5_CL_H */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_context.c b/lib/mesa/src/gallium/drivers/vc5/vc5_context.c new file mode 100644 index 000000000..f80020ab3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_context.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <xf86drm.h> +#include <err.h> + +#include "pipe/p_defines.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_blitter.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" +#include "pipe/p_screen.h" + +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" + +void +vc5_flush(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +static void +vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (fence) { + struct pipe_screen *screen = pctx->screen; + struct vc5_fence *f = vc5_fence_create(vc5->screen, + vc5->last_emit_seqno); + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } +} + +static void +vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + + rsc->initialized_buffers = 0; + + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (!entry) + return; + + struct vc5_job *job = entry->data; + if (job->key.zsbuf && job->key.zsbuf->texture == prsc) + job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); +} + +static void +vc5_context_destroy(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (vc5->blitter) + util_blitter_destroy(vc5->blitter); + + if (vc5->primconvert) + util_primconvert_destroy(vc5->primconvert); + + if (vc5->uploader) + u_upload_destroy(vc5->uploader); + + slab_destroy_child(&vc5->transfer_pool); + + pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL); + pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL); + + vc5_program_fini(pctx); + + ralloc_free(vc5); +} + +struct pipe_context * +vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_context *vc5; + + /* Prevent dumping of the shaders built during context setup. */ + uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB; + V3D_DEBUG &= ~V3D_DEBUG_SHADERDB; + + vc5 = rzalloc(NULL, struct vc5_context); + if (!vc5) + return NULL; + struct pipe_context *pctx = &vc5->base; + + vc5->screen = screen; + + pctx->screen = pscreen; + pctx->priv = priv; + pctx->destroy = vc5_context_destroy; + pctx->flush = vc5_pipe_flush; + pctx->invalidate_resource = vc5_invalidate_resource; + + vc5_draw_init(pctx); + vc5_state_init(pctx); + vc5_program_init(pctx); + vc5_query_init(pctx); + vc5_resource_context_init(pctx); + + vc5_job_init(vc5); + + vc5->fd = screen->fd; + + slab_create_child(&vc5->transfer_pool, &screen->transfer_pool); + + vc5->uploader = u_upload_create_default(&vc5->base); + vc5->base.stream_uploader = vc5->uploader; + vc5->base.const_uploader = vc5->uploader; + + vc5->blitter = util_blitter_create(pctx); + if (!vc5->blitter) + goto fail; + + vc5->primconvert = util_primconvert_create(pctx, + (1 << PIPE_PRIM_QUADS) - 1); + if (!vc5->primconvert) + goto fail; + + V3D_DEBUG |= saved_shaderdb_flag; + + vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; + + return &vc5->base; + +fail: + pctx->destroy(pctx); + return NULL; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_context.h b/lib/mesa/src/gallium/drivers/vc5/vc5_context.h new file mode 100644 index 000000000..a1017bd1a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_context.h @@ -0,0 +1,473 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CONTEXT_H +#define VC5_CONTEXT_H + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/bitset.h" +#include "util/slab.h" +#include "xf86drm.h" +#include "vc5_drm.h" +#include "vc5_screen.h" + +struct vc5_job; +struct vc5_bo; +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); + +#define __user +#include "vc5_drm.h" +#include "vc5_bufmgr.h" +#include "vc5_resource.h" +#include "vc5_cl.h" + +#ifdef USE_VC5_SIMULATOR +#define using_vc5_simulator true +#else +#define using_vc5_simulator false +#endif + +#define VC5_DIRTY_BLEND (1 << 0) +#define VC5_DIRTY_RASTERIZER (1 << 1) +#define VC5_DIRTY_ZSA (1 << 2) +#define VC5_DIRTY_FRAGTEX (1 << 3) +#define VC5_DIRTY_VERTTEX (1 << 4) + +#define VC5_DIRTY_BLEND_COLOR (1 << 7) +#define VC5_DIRTY_STENCIL_REF (1 << 8) +#define VC5_DIRTY_SAMPLE_MASK (1 << 9) +#define VC5_DIRTY_FRAMEBUFFER (1 << 10) +#define VC5_DIRTY_STIPPLE (1 << 11) +#define VC5_DIRTY_VIEWPORT (1 << 12) +#define VC5_DIRTY_CONSTBUF (1 << 13) +#define VC5_DIRTY_VTXSTATE (1 << 14) +#define VC5_DIRTY_VTXBUF (1 << 15) +#define VC5_DIRTY_SCISSOR (1 << 17) +#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18) +#define VC5_DIRTY_PRIM_MODE (1 << 19) +#define VC5_DIRTY_CLIP (1 << 20) +#define VC5_DIRTY_UNCOMPILED_VS (1 << 21) +#define VC5_DIRTY_UNCOMPILED_FS (1 << 22) +#define VC5_DIRTY_COMPILED_CS (1 << 23) +#define VC5_DIRTY_COMPILED_VS (1 << 24) +#define VC5_DIRTY_COMPILED_FS (1 << 25) +#define VC5_DIRTY_FS_INPUTS (1 << 26) +#define VC5_DIRTY_STREAMOUT (1 << 27) + +#define VC5_MAX_FS_INPUTS 64 + +struct vc5_sampler_view { + struct pipe_sampler_view base; + uint32_t p0; + uint32_t p1; + /* Precomputed swizzles to pass in to the shader key. */ + uint8_t swizzle[4]; + + uint8_t texture_shader_state[32]; +}; + +struct vc5_sampler_state { + struct pipe_sampler_state base; + uint32_t p0; + uint32_t p1; + + uint8_t texture_shader_state[32]; +}; + +struct vc5_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; +}; + +struct vc5_shader_uniform_info { + enum quniform_contents *contents; + uint32_t *data; + uint32_t count; +}; + +struct vc5_uncompiled_shader { + /** A name for this program, so you can track it in shader-db output. */ + uint32_t program_id; + /** How many variants of this program were compiled, for shader-db. */ + uint32_t compiled_variant_count; + struct pipe_shader_state base; + uint32_t num_tf_outputs; + struct v3d_varying_slot *tf_outputs; + uint16_t tf_specs[PIPE_MAX_SO_BUFFERS]; + uint32_t num_tf_specs; +}; + +struct vc5_compiled_shader { + struct vc5_bo *bo; + + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /** + * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the + * uniforms have to be rewritten (and therefore the shader state + * reemitted). + */ + uint32_t uniform_dirty_bits; +}; + +struct vc5_program_stateobj { + struct vc5_uncompiled_shader *bind_vs, *bind_fs; + struct vc5_compiled_shader *cs, *vs, *fs; +}; + +struct vc5_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertex_stateobj { + struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES]; + unsigned num_elements; + + uint8_t attrs[12 * VC5_MAX_ATTRIBUTES]; + struct vc5_bo *default_attribute_values; +}; + +struct vc5_streamout_stateobj { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; +}; + +/* Hash table key for vc5->jobs */ +struct vc5_job_key { + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; +}; + +/** + * A complete bin/render job. + * + * This is all of the state necessary to submit a bin/render to the kernel. + * We want to be able to have multiple in progress at a time, so that we don't + * need to flush an existing CL just to switch to rendering to a new render + * target (which would mean reading back from the old render target when + * starting to render to it again). + */ +struct vc5_job { + struct vc5_context *vc5; + struct vc5_cl bcl; + struct vc5_cl rcl; + struct vc5_cl indirect; + struct vc5_bo *tile_alloc; + uint32_t shader_rec_count; + + struct drm_vc5_submit_cl submit; + + /** + * Set of all BOs referenced by the job. This will be used for making + * the list of BOs that the kernel will need to have paged in to + * execute our job. + */ + struct set *bos; + + struct set *write_prscs; + + /* Size of the submit.bo_handles array. */ + uint32_t bo_handles_size; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; + /** @} */ + /** @{ + * Bounding box of the scissor across all queued drawing. + * + * Note that the max values are exclusive. + */ + uint32_t draw_min_x; + uint32_t draw_min_y; + uint32_t draw_max_x; + uint32_t draw_max_y; + /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC5_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ + /** @{ Tile information, depending on MSAA and float color buffer. */ + uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */ + uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */ + + uint32_t tile_width; /** @< Width of a tile. */ + uint32_t tile_height; /** @< Height of a tile. */ + /** maximum internal_bpp of all color render targets. */ + uint32_t internal_bpp; + + /** Whether the current rendering is in a 4X MSAA tile buffer. */ + bool msaa; + /** @} */ + + /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the + * first rendering. + */ + uint32_t cleared; + /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to + * (either clears or draws). + */ + uint32_t resolve; + uint32_t clear_color[4][4]; + float clear_z; + uint8_t clear_s; + + /** + * Set if some drawing (triangles, blits, or just a glClear()) has + * been done to the FBO, meaning that we need to + * DRM_IOCTL_VC5_SUBMIT_CL. + */ + bool needs_flush; + + bool uses_early_z; + + /** + * Number of draw calls (not counting full buffer clears) queued in + * the current job. + */ + uint32_t draw_calls_queued; + + struct vc5_job_key key; +}; + +struct vc5_context { + struct pipe_context base; + + int fd; + struct vc5_screen *screen; + + /** The 3D rendering job for the currently bound FBO. */ + struct vc5_job *job; + + /* Map from struct vc5_job_key to the job for that FBO. + */ + struct hash_table *jobs; + + /** + * Map from vc5_resource to a job writing to that resource. + * + * Primarily for flushing jobs rendering to textures that are now + * being read from. + */ + struct hash_table *write_jobs; + + struct slab_child_pool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC5_DIRTY_* */ + uint32_t dirty; + + struct primconvert_context *primconvert; + + struct hash_table *fs_cache, *vs_cache; + uint32_t next_uncompiled_program_id; + uint64_t next_compiled_program_id; + + struct vc5_compiler_state *compiler_state; + + uint8_t prim_mode; + + /** Maximum index buffer valid for the current shader_rec. */ + uint32_t max_index; + + /** Seqno of the last CL flush's job. */ + uint64_t last_emit_seqno; + + struct u_upload_mgr *uploader; + + /** @{ Current pipeline state objects */ + struct pipe_scissor_state scissor; + struct pipe_blend_state *blend; + struct vc5_rasterizer_state *rasterizer; + struct vc5_depth_stencil_alpha_state *zsa; + + struct vc5_texture_stateobj verttex, fragtex; + + struct vc5_program_stateobj prog; + + struct vc5_vertex_stateobj *vtx; + + struct { + struct pipe_blend_color f; + uint16_t hf[4]; + } blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple stipple; + struct pipe_clip_state clip; + struct pipe_viewport_state viewport; + struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct vc5_vertexbuf_stateobj vertexbuf; + struct vc5_streamout_stateobj streamout; + /** @} */ +}; + +struct vc5_rasterizer_state { + struct pipe_rasterizer_state base; + + /* VC5_CONFIGURATION_BITS */ + uint8_t config_bits[3]; + + float point_size; + + /** + * Half-float (1/8/7 bits) value of polygon offset units for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_units; + /** + * Half-float (1/8/7 bits) value of polygon offset scale for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_factor; +}; + +struct vc5_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + bool early_z_enable; + + /** Uniforms for stencil state. + * + * Index 0 is either the front config, or the front-and-back config. + * Index 1 is the back config if doing separate back stencil. + * Index 2 is the writemask config if it's not a common mask value. + */ + uint32_t stencil_uniforms[3]; +}; + +#define perf_debug(...) do { \ + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +static inline struct vc5_context * +vc5_context(struct pipe_context *pcontext) +{ + return (struct vc5_context *)pcontext; +} + +static inline struct vc5_sampler_view * +vc5_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct vc5_sampler_view *)psview; +} + +static inline struct vc5_sampler_state * +vc5_sampler_state(struct pipe_sampler_state *psampler) +{ + return (struct vc5_sampler_state *)psampler; +} + +struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); +void vc5_draw_init(struct pipe_context *pctx); +void vc5_state_init(struct pipe_context *pctx); +void vc5_program_init(struct pipe_context *pctx); +void vc5_program_fini(struct pipe_context *pctx); +void vc5_query_init(struct pipe_context *pctx); + +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +int vc5_simulator_flush(struct vc5_context *vc5, + struct drm_vc5_submit_cl *args, + struct vc5_job *job); +int vc5_simulator_ioctl(int fd, unsigned long request, void *arg); +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size); + +static inline int +vc5_ioctl(int fd, unsigned long request, void *arg) +{ + if (using_vc5_simulator) + return vc5_simulator_ioctl(fd, request, arg); + else + return drmIoctl(fd, request, arg); +} + +void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader); +struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate); + +void vc5_flush(struct pipe_context *pctx); +void vc5_job_init(struct vc5_context *vc5); +struct vc5_job *vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, + struct pipe_surface *zsbuf); +struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5); +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); +void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc); +void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job); +void vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_emit_state(struct pipe_context *pctx); +void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode); + +bool vc5_rt_format_supported(enum pipe_format f); +bool vc5_tex_format_supported(enum pipe_format f); +uint8_t vc5_get_rt_format(enum pipe_format f); +uint8_t vc5_get_tex_format(enum pipe_format f); +uint8_t vc5_get_tex_return_size(enum pipe_format f); +uint8_t vc5_get_tex_return_channels(enum pipe_format f); +const uint8_t *vc5_get_format_swizzle(enum pipe_format f); +void vc5_get_internal_type_bpp_for_output_format(uint32_t format, + uint32_t *type, + uint32_t *bpp); + +void vc5_init_query_functions(struct vc5_context *vc5); +void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); +void vc5_blitter_save(struct vc5_context *vc5); +void vc5_emit_rcl(struct vc5_job *job); + + +#endif /* VC5_CONTEXT_H */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c b/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c new file mode 100644 index 000000000..11d9e92a9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c @@ -0,0 +1,529 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blitter.h" +#include "util/u_prim.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" +#include "util/u_prim_restart.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" + +#include "vc5_context.h" +#include "vc5_resource.h" +#include "vc5_cl.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +/** + * Does the initial bining command list setup for drawing to a given FBO. + */ +static void +vc5_start_draw(struct vc5_context *vc5) +{ + struct vc5_job *job = vc5->job; + + if (job->needs_flush) + return; + + /* Get space to emit our BCL state, using a branch to jump to a new BO + * if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + job->submit.bcl_start = job->bcl.bo->offset; + vc5_job_add_bo(job, job->bcl.bo); + + job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); + struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen, + job->draw_tiles_y * + job->draw_tiles_x * + 64, + "TSDA"); + + /* "Binning mode lists start with a Tile Binning Mode Configuration + * item (120)" + * + * Part1 signals the end of binning config setup. + */ + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) { + config.tile_allocation_memory_address = + cl_address(job->tile_alloc, 0); + config.tile_allocation_memory_size = job->tile_alloc->size; + } + + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { + config.tile_state_data_array_base_address = + cl_address(tsda, 0); + + config.width_in_tiles = job->draw_tiles_x; + config.height_in_tiles = job->draw_tiles_y; + + /* Must be >= 1 */ + config.number_of_render_targets = + MAX2(vc5->framebuffer.nr_cbufs, 1); + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + vc5_bo_unreference(&tsda); + + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); + + cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) { + fmt.data_type = LIST_INDEXED; + fmt.primitive_type = LIST_TRIANGLES; + } + + job->needs_flush = true; + job->draw_width = vc5->framebuffer.width; + job->draw_height = vc5->framebuffer.height; +} + +static void +vc5_predraw_check_textures(struct pipe_context *pctx, + struct vc5_texture_stateobj *stage_tex) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + for (int i = 0; i < stage_tex->num_textures; i++) { + struct pipe_sampler_view *view = stage_tex->textures[i]; + if (!view) + continue; + + vc5_flush_jobs_writing_resource(vc5, view->texture); + } +} + +static void +vc5_emit_gl_shader_state(struct vc5_context *vc5, + const struct pipe_draw_info *info) +{ + struct vc5_job *job = vc5->job; + /* VC5_DIRTY_VTXSTATE */ + struct vc5_vertex_stateobj *vtx = vc5->vtx; + /* VC5_DIRTY_VTXBUF */ + struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf; + + /* Upload the uniforms to the indirect CL first */ + struct vc5_cl_reloc fs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.fs, + &vc5->constbuf[PIPE_SHADER_FRAGMENT], + &vc5->fragtex); + struct vc5_cl_reloc vs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.vs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + struct vc5_cl_reloc cs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.cs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + + uint32_t shader_rec_offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(GL_SHADER_STATE_RECORD) + + vtx->num_elements * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + + cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ + shader.point_size_in_shaded_vertex_data = + (info->mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = + (vc5->prog.fs->prog_data.fs->writes_z || + vc5->prog.fs->prog_data.fs->discard); + + shader.number_of_varyings_in_fragment_shader = + vc5->prog.fs->prog_data.base->num_inputs; + + shader.propagate_nans = true; + + shader.coordinate_shader_code_address = + cl_address(vc5->prog.cs->bo, 0); + shader.vertex_shader_code_address = + cl_address(vc5->prog.vs->bo, 0); + shader.fragment_shader_code_address = + cl_address(vc5->prog.fs->bo, 0); + + /* XXX: Use combined input/output size flag in the common + * case. + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; + shader.coordinate_shader_input_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_input_size; + shader.vertex_shader_input_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_input_size; + + shader.coordinate_shader_output_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_output_size; + + shader.coordinate_shader_uniforms_address = cs_uniforms; + shader.vertex_shader_uniforms_address = vs_uniforms; + shader.fragment_shader_uniforms_address = fs_uniforms; + + shader.vertex_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_iid; + + shader.address_of_default_attribute_values = + cl_address(vtx->default_attribute_values, 0); + } + + for (int i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + struct vc5_resource *rsc = vc5_resource(vb->buffer.resource); + + struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = { + .stride = vb->stride, + .address = cl_address(rsc->bo, + vb->buffer_offset + + elem->src_offset), + .number_of_values_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->vattr_sizes[i], + .number_of_values_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->vattr_sizes[i], + }; + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + uint8_t attr_packed[size]; + V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(&job->indirect, + attr_packed, + &attr_unpacked); + for (int j = 0; j < size; j++) + attr_packed[j] |= vtx->attrs[i * size + j]; + cl_emit_prepacked(&job->indirect, &attr_packed); + } + + cl_emit(&job->bcl, GL_SHADER_STATE, state) { + state.address = cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = vtx->num_elements; + } + + vc5_bo_unreference(&cs_uniforms.bo); + vc5_bo_unreference(&vs_uniforms.bo); + vc5_bo_unreference(&fs_uniforms.bo); + + job->shader_rec_count++; +} + +static void +vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + if (!info->count_from_stream_output && !info->indirect && + !info->primitive_restart && + !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) + return; + + /* Fall back for weird desktop GL primitive restart values. */ + if (info->primitive_restart && + info->index_size) { + uint32_t mask = ~0; + + switch (info->index_size) { + case 2: + mask = 0xffff; + break; + case 1: + mask = 0xff; + break; + } + + if (info->restart_index != mask) { + util_draw_vbo_without_prim_restart(pctx, info); + return; + } + } + + if (info->mode >= PIPE_PRIM_QUADS) { + util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base); + util_primconvert_draw_vbo(vc5->primconvert, info); + perf_debug("Fallback conversion for %d %s vertices\n", + info->count, u_prim_name(info->mode)); + return; + } + + /* Before setting up the draw, flush anything writing to the textures + * that we read from. + */ + vc5_predraw_check_textures(pctx, &vc5->verttex); + vc5_predraw_check_textures(pctx, &vc5->fragtex); + + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* Get space to emit our draw call into the BCL, using a branch to + * jump to a new BO if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + if (vc5->prim_mode != info->mode) { + vc5->prim_mode = info->mode; + vc5->dirty |= VC5_DIRTY_PRIM_MODE; + } + + vc5_start_draw(vc5); + vc5_update_compiled_shaders(vc5, info->mode); + + vc5_emit_state(pctx); + + if (vc5->dirty & (VC5_DIRTY_VTXBUF | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_COMPILED_CS | + VC5_DIRTY_COMPILED_VS | + VC5_DIRTY_COMPILED_FS | + vc5->prog.cs->uniform_dirty_bits | + vc5->prog.vs->uniform_dirty_bits | + vc5->prog.fs->uniform_dirty_bits)) { + vc5_emit_gl_shader_state(vc5, info); + } + + vc5->dirty = 0; + + /* The Base Vertex/Base Instance packet sets those values to nonzero + * for the next draw call only. + */ + if (info->index_bias || info->start_instance) { + cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { + base.base_instance = info->start_instance; + base.base_vertex = info->index_bias; + } + } + + /* The HW only processes transform feedback on primitives with the + * flag set. + */ + uint32_t prim_tf_enable = 0; + if (vc5->prog.bind_vs->num_tf_outputs) + prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); + + /* Note that the primitive type fields match with OpenGL/gallium + * definitions, up to but not including QUADS. + */ + if (info->index_size) { + uint32_t index_size = info->index_size; + uint32_t offset = info->start * index_size; + struct pipe_resource *prsc; + if (info->has_user_indices) { + prsc = NULL; + u_upload_data(vc5->uploader, 0, + info->count * info->index_size, 4, + info->index.user, + &offset, &prsc); + } else { + prsc = info->index.resource; + } + struct vc5_resource *rsc = vc5_resource(prsc); + + if (info->instance_count > 1) { + cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.length = info->count; + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + } + } + + job->draw_calls_queued++; + + if (info->has_user_indices) + pipe_resource_reference(&prsc, NULL); + } else { + if (info->instance_count > 1) { + cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.index_of_first_vertex = info->start; + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.length = info->count; + prim.index_of_first_vertex = info->start; + } + } + } + job->draw_calls_queued++; + + if (vc5->zsa && job->zsbuf && + (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + vc5_job_add_bo(job, rsc->bo); + + if (vc5->zsa->base.depth.enabled) { + job->resolve |= PIPE_CLEAR_DEPTH; + rsc->initialized_buffers = PIPE_CLEAR_DEPTH; + + if (vc5->zsa->early_z_enable) + job->uses_early_z = true; + } + + if (vc5->zsa->base.stencil[0].enabled) { + job->resolve |= PIPE_CLEAR_STENCIL; + rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + + if (job->resolve & bit || !job->cbufs[i]) + continue; + struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture); + + job->resolve |= bit; + vc5_job_add_bo(job, rsc->bo); + } + + if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) + vc5_flush(pctx); +} + +static void +vc5_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* We can't flag new buffers for clearing once we've queued draws. We + * could avoid this by using the 3d engine to clear. + */ + if (job->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); + vc5_job_submit(vc5, job); + job = vc5_get_job_for_fbo(vc5); + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(buffers & bit)) + continue; + + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + struct vc5_resource *rsc = + vc5_resource(cbuf->texture); + + union util_color uc; + util_pack_color(color->f, cbuf->format, &uc); + + memcpy(job->clear_color[i], uc.ui, + util_format_get_blocksize(cbuf->format)); + + rsc->initialized_buffers |= bit; + } + + unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; + if (zsclear) { + struct vc5_resource *rsc = + vc5_resource(vc5->framebuffer.zsbuf->texture); + + if (zsclear & PIPE_CLEAR_DEPTH) + job->clear_z = depth; + if (zsclear & PIPE_CLEAR_STENCIL) + job->clear_s = stencil; + + rsc->initialized_buffers |= zsclear; + } + + job->draw_min_x = 0; + job->draw_min_y = 0; + job->draw_max_x = vc5->framebuffer.width; + job->draw_max_y = vc5->framebuffer.height; + job->cleared |= buffers; + job->resolve |= buffers; + + vc5_start_draw(vc5); +} + +static void +vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear RT\n"); +} + +static void +vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear DS\n"); +} + +void +vc5_draw_init(struct pipe_context *pctx) +{ + pctx->draw_vbo = vc5_draw_vbo; + pctx->clear = vc5_clear; + pctx->clear_render_target = vc5_clear_render_target; + pctx->clear_depth_stencil = vc5_clear_depth_stencil; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h b/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h new file mode 100644 index 000000000..e70cf9d56 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h @@ -0,0 +1,191 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _VC5_DRM_H_ +#define _VC5_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_VC5_SUBMIT_CL 0x00 +#define DRM_VC5_WAIT_SEQNO 0x01 +#define DRM_VC5_WAIT_BO 0x02 +#define DRM_VC5_CREATE_BO 0x03 +#define DRM_VC5_MMAP_BO 0x04 +#define DRM_VC5_GET_PARAM 0x05 +#define DRM_VC5_GET_BO_OFFSET 0x06 + +#define DRM_IOCTL_VC5_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl) +#define DRM_IOCTL_VC5_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno) +#define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo) +#define DRM_IOCTL_VC5_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo) +#define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo) +#define DRM_IOCTL_VC5_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_PARAM, struct drm_vc5_get_param) +#define DRM_IOCTL_VC5_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset) + +/** + * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute an optional binner + * command list, and a render command list. + */ +struct drm_vc5_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + */ + __u32 bcl_start; + + /** End address of the BCL (first byte after the BCL) */ + __u32 bcl_end; + + /* Offset of the render command list. + * + * This is the second set of commands executed, which will either + * execute the tiles that have been set up by the BCL, or a fixed set + * of tiles (in the case of RCL-only blits). + */ + __u32 rcl_start; + + /** End address of the RCL (first byte after the RCL) */ + __u32 rcl_end; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Pointer to an array of chunks of extra submit CL information. (the + * chunk struct is not yet defined) + */ + __u64 chunks; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + __u32 chunk_count; + + __u64 flags; +}; + +/** + * struct drm_vc5_wait_seqno - ioctl argument for waiting for + * DRM_VC5_SUBMIT_CL completion using its returned seqno. + * + * timeout_ns is the timeout in nanoseconds, where "0" means "don't + * block, just return the status." + */ +struct drm_vc5_wait_seqno { + __u64 seqno; + __u64 timeout_ns; +}; + +/** + * struct drm_vc5_wait_bo - ioctl argument for waiting for + * completion of the last DRM_VC5_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_vc5_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_vc5_create_bo - ioctl argument for creating VC5 BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc5_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the V3D address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + */ + __u32 offset; +}; + +/** + * struct drm_vc5_mmap_bo - ioctl argument for mapping VC5 BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc5_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_vc5_param { + DRM_VC5_PARAM_V3D_UIFCFG, + DRM_VC5_PARAM_V3D_HUB_IDENT1, + DRM_VC5_PARAM_V3D_HUB_IDENT2, + DRM_VC5_PARAM_V3D_HUB_IDENT3, + DRM_VC5_PARAM_V3D_CORE0_IDENT0, + DRM_VC5_PARAM_V3D_CORE0_IDENT1, + DRM_VC5_PARAM_V3D_CORE0_IDENT2, +}; + +struct drm_vc5_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the V3D address space for this DRM fd. + * This is the same value returned by drm_vc5_create_bo, if that was called + * from this DRM fd. + */ +struct drm_vc5_get_bo_offset { + __u32 handle; + __u32 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _VC5_DRM_H_ */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c b/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c new file mode 100644 index 000000000..3cb44feff --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c @@ -0,0 +1,464 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_half.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +static uint8_t +vc5_factor(enum pipe_blendfactor factor) +{ + /* We may get a bad blendfactor when blending is disabled. */ + if (factor == 0) + return V3D_BLEND_FACTOR_ZERO; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return V3D_BLEND_FACTOR_ZERO; + case PIPE_BLENDFACTOR_ONE: + return V3D_BLEND_FACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V3D_BLEND_FACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V3D_BLEND_FACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return V3D_BLEND_FACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V3D_BLEND_FACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V3D_BLEND_FACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V3D_BLEND_FACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return V3D_BLEND_FACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return V3D_BLEND_FACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_CONST_COLOR: + return V3D_BLEND_FACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V3D_BLEND_FACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V3D_BLEND_FACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V3D_BLEND_FACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Bad blend factor"); + } +} + +static inline uint16_t +swizzled_border_color(struct pipe_sampler_state *sampler, + struct vc5_sampler_view *sview, + int chan) +{ + const struct util_format_description *desc = + util_format_description(sview->base.format); + uint8_t swiz = chan; + + /* If we're doing swizzling in the sampler, then only rearrange the + * border color for the mismatch between the VC5 texture format and + * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by + * the sampler's swizzle. + * + * For swizzling in the shader, we don't do any pre-swizzling of the + * border color. + */ + if (vc5_get_tex_return_size(sview->base.format) != 32) + swiz = desc->swizzle[swiz]; + + switch (swiz) { + case PIPE_SWIZZLE_0: + return util_float_to_half(0.0); + case PIPE_SWIZZLE_1: + return util_float_to_half(1.0); + default: + return util_float_to_half(sampler->border_color.f[swiz]); + } +} + +static void +emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, + int i) +{ + struct vc5_job *job = vc5->job; + struct pipe_sampler_state *psampler = stage_tex->samplers[i]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + struct pipe_sampler_view *psview = stage_tex->textures[i]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct pipe_resource *prsc = psview->texture; + struct vc5_resource *rsc = vc5_resource(prsc); + + stage_tex->texture_state[i].offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(TEXTURE_SHADER_STATE), + 32); + vc5_bo_set_reference(&stage_tex->texture_state[i].bo, + job->indirect.bo); + + struct V3D33_TEXTURE_SHADER_STATE unpacked = { + /* XXX */ + .border_color_red = swizzled_border_color(psampler, sview, 0), + .border_color_green = swizzled_border_color(psampler, sview, 1), + .border_color_blue = swizzled_border_color(psampler, sview, 2), + .border_color_alpha = swizzled_border_color(psampler, sview, 3), + + /* XXX: Disable min/maxlod for txf */ + .max_level_of_detail = MIN2(MIN2(psampler->max_lod, + VC5_MAX_MIP_LEVELS), + psview->u.tex.last_level), + + .texture_base_pointer = cl_address(rsc->bo, + rsc->slices[0].offset), + }; + + int min_img_filter = psampler->min_img_filter; + int min_mip_filter = psampler->min_mip_filter; + int mag_img_filter = psampler->mag_img_filter; + + if (vc5_get_tex_return_size(psview->format) == 32) { + min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST); + switch (min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + unpacked.minification_filter = 0 + min_nearest; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + unpacked.minification_filter = 2 + !min_nearest; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + unpacked.minification_filter = 4 + !min_nearest; + break; + } + unpacked.magnification_filter = (mag_img_filter == + PIPE_TEX_FILTER_NEAREST); + + uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; + cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); + + for (int i = 0; i < ARRAY_SIZE(packed); i++) + packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; + + cl_emit_prepacked(&job->indirect, &packed); +} + +static void +emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) + emit_one_texture(vc5, stage_tex, i); +} + +void +vc5_emit_state(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5->job; + + if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | + VC5_DIRTY_RASTERIZER)) { + float *vpscale = vc5->viewport.scale; + float *vptranslate = vc5->viewport.translate; + float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; + float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; + float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; + float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; + + /* Clip to the scissor if it's enabled, but still clip to the + * drawable regardless since that controls where the binner + * tries to put things. + * + * Additionally, always clip the rendering to the viewport, + * since the hardware does guardband clipping, meaning + * primitives would rasterize outside of the view volume. + */ + uint32_t minx, miny, maxx, maxy; + if (!vc5->rasterizer->base.scissor) { + minx = MAX2(vp_minx, 0); + miny = MAX2(vp_miny, 0); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); + } else { + minx = MAX2(vp_minx, vc5->scissor.minx); + miny = MAX2(vp_miny, vc5->scissor.miny); + maxx = MIN2(vp_maxx, vc5->scissor.maxx); + maxy = MIN2(vp_maxy, vc5->scissor.maxy); + } + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_height_in_pixels = maxy - miny; + clip.clip_window_width_in_pixels = maxx - minx; + clip.clip_window_height_in_pixels = maxy - miny; + } + + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); + } + + if (vc5->dirty & (VC5_DIRTY_RASTERIZER | + VC5_DIRTY_ZSA | + VC5_DIRTY_BLEND | + VC5_DIRTY_COMPILED_FS)) { + cl_emit(&job->bcl, CONFIGURATION_BITS, config) { + config.enable_forward_facing_primitive = + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_FRONT); + config.enable_reverse_facing_primitive = + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_BACK); + /* This seems backwards, but it's what gets the + * clipflat test to pass. + */ + config.clockwise_primitives = + vc5->rasterizer->base.front_ccw; + + config.enable_depth_offset = + vc5->rasterizer->base.offset_tri; + + config.rasterizer_oversample_mode = + vc5->rasterizer->base.multisample; + + config.direct3d_provoking_vertex = + vc5->rasterizer->base.flatshade_first; + + config.blend_enable = vc5->blend->rt[0].blend_enable; + + config.early_z_updates_enable = true; + if (vc5->zsa->base.depth.enabled) { + config.z_updates_enable = + vc5->zsa->base.depth.writemask; + config.early_z_enable = + vc5->zsa->early_z_enable; + config.depth_test_function = + vc5->zsa->base.depth.func; + } else { + config.depth_test_function = PIPE_FUNC_ALWAYS; + } + } + + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_factor = + vc5->rasterizer->offset_factor; + depth.depth_offset_units = + vc5->rasterizer->offset_units; + } + + cl_emit(&job->bcl, POINT_SIZE, point_size) { + point_size.point_size = vc5->rasterizer->point_size; + } + + cl_emit(&job->bcl, LINE_WIDTH, line_width) { + line_width.line_width = vc5->rasterizer->base.line_width; + } + } + + if (vc5->dirty & VC5_DIRTY_VIEWPORT) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = + vc5->viewport.scale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = + vc5->viewport.scale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc5->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc5->viewport.scale[2]; + } + if (0 /* XXX */) { + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + clip.minimum_zw = (vc5->viewport.translate[2] - + vc5->viewport.scale[2]); + clip.maximum_zw = (vc5->viewport.translate[2] + + vc5->viewport.scale[2]); + } + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + vc5->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + vc5->viewport.translate[1]; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND) { + struct pipe_blend_state *blend = vc5->blend; + + cl_emit(&job->bcl, BLEND_CONFIG, config) { + struct pipe_rt_blend_state *rtblend = &blend->rt[0]; + + config.colour_blend_mode = rtblend->rgb_func; + config.colour_blend_dst_factor = + vc5_factor(rtblend->rgb_dst_factor); + config.colour_blend_src_factor = + vc5_factor(rtblend->rgb_src_factor); + + config.alpha_blend_mode = rtblend->alpha_func; + config.alpha_blend_dst_factor = + vc5_factor(rtblend->alpha_dst_factor); + config.alpha_blend_src_factor = + vc5_factor(rtblend->alpha_src_factor); + } + + cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { + if (blend->independent_blend_enable) { + mask.render_target_0_per_colour_component_write_masks = + (~blend->rt[0].colormask) & 0xf; + mask.render_target_1_per_colour_component_write_masks = + (~blend->rt[1].colormask) & 0xf; + mask.render_target_2_per_colour_component_write_masks = + (~blend->rt[2].colormask) & 0xf; + mask.render_target_3_per_colour_component_write_masks = + (~blend->rt[3].colormask) & 0xf; + } else { + uint8_t colormask = (~blend->rt[0].colormask) & 0xf; + mask.render_target_0_per_colour_component_write_masks = colormask; + mask.render_target_1_per_colour_component_write_masks = colormask; + mask.render_target_2_per_colour_component_write_masks = colormask; + mask.render_target_3_per_colour_component_write_masks = colormask; + } + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) { + cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { + /* XXX: format-dependent swizzling */ + colour.red_f16 = vc5->blend_color.hf[2]; + colour.green_f16 = vc5->blend_color.hf[1]; + colour.blue_f16 = vc5->blend_color.hf[0]; + colour.alpha_f16 = vc5->blend_color.hf[3]; + } + } + + if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { + struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; + struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; + + cl_emit(&job->bcl, STENCIL_CONFIG, config) { + config.front_config = true; + config.back_config = !back->enabled; + + config.stencil_write_mask = front->writemask; + config.stencil_test_mask = front->valuemask; + + config.stencil_test_function = front->func; + config.stencil_pass_op = front->zpass_op; + config.depth_test_fail_op = front->zfail_op; + config.stencil_test_fail_op = front->fail_op; + + config.stencil_ref_value = vc5->stencil_ref.ref_value[0]; + } + + if (back->enabled) { + cl_emit(&job->bcl, STENCIL_CONFIG, config) { + config.front_config = false; + config.back_config = true; + + config.stencil_write_mask = back->writemask; + config.stencil_test_mask = back->valuemask; + + config.stencil_test_function = back->func; + config.stencil_pass_op = back->zpass_op; + config.depth_test_fail_op = back->zfail_op; + config.stencil_test_fail_op = back->fail_op; + + config.stencil_ref_value = + vc5->stencil_ref.ref_value[1]; + } + } + } + + if (vc5->dirty & VC5_DIRTY_FRAGTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_VERTTEX) + emit_textures(vc5, &vc5->verttex); + + if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { + /* XXX: Need to handle more than 24 entries. */ + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = 0; + + flags.flat_shade_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff; + + if (vc5->rasterizer->base.flatshade) { + flags.flat_shade_flags_for_varyings_v024 |= + vc5->prog.fs->prog_data.fs->shade_model_flags[0] & 0xfffff; + } + } + } + + if (vc5->dirty & VC5_DIRTY_STREAMOUT) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + + if (so->num_targets) { + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { + tfe.number_of_32_bit_output_buffer_address_following = + so->num_targets; + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + }; + + for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { + cl_emit_prepacked(&job->bcl, + &vc5->prog.bind_vs->tf_specs[i]); + } + + for (int i = 0; i < so->num_targets; i++) { + const struct pipe_stream_output_target *target = + so->targets[i]; + struct vc5_resource *rsc = + vc5_resource(target->buffer); + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { + output.address = + cl_address(rsc->bo, + target->buffer_offset); + }; + + vc5_job_add_write_resource(vc5->job, + target->buffer); + /* XXX: buffer_size? */ + } + } else { + /* XXX? */ + } + } +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c b/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c new file mode 100644 index 000000000..08de9bca5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_fence.c + * + * Seqno-based fence management. + * + * We have two mechanisms for waiting in our kernel API: You can wait on a BO + * to have all rendering to from any process to be completed, or wait on a + * seqno for that particular seqno to be passed. The fence API we're + * implementing is based on waiting for all rendering in the context to have + * completed (with no reference to what other processes might be doing with + * the same BOs), so we can just use the seqno of the last rendering we'd + * fired off as our fence marker. + */ + +#include "util/u_inlines.h" + +#include "vc5_screen.h" +#include "vc5_bufmgr.h" + +struct vc5_fence { + struct pipe_reference reference; + uint64_t seqno; +}; + +static void +vc5_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pp, + struct pipe_fence_handle *pf) +{ + struct vc5_fence **p = (struct vc5_fence **)pp; + struct vc5_fence *f = (struct vc5_fence *)pf; + struct vc5_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + free(old); + } + *p = f; +} + +static boolean +vc5_fence_finish(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_fence_handle *pf, + uint64_t timeout_ns) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence *f = (struct vc5_fence *)pf; + + return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait"); +} + +struct vc5_fence * +vc5_fence_create(struct vc5_screen *screen, uint64_t seqno) +{ + struct vc5_fence *f = calloc(1, sizeof(*f)); + + if (!f) + return NULL; + + pipe_reference_init(&f->reference, 1); + f->seqno = seqno; + + return f; +} + +void +vc5_fence_init(struct vc5_screen *screen) +{ + screen->base.fence_reference = vc5_fence_reference; + screen->base.fence_finish = vc5_fence_finish; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c b/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c new file mode 100644 index 000000000..114f2d741 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c @@ -0,0 +1,416 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_formats.c + * + * Contains the table and accessors for VC5 texture and render target format + * support. + * + * The hardware has limited support for texture formats, and extremely limited + * support for render target formats. As a result, we emulate other formats + * in our shader code, and this stores the table for doing so. + */ + +#include "util/u_format.h" +#include "util/macros.h" + +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#define OUTPUT_IMAGE_FORMAT_NO 255 + +struct vc5_format { + /** Set if the pipe format is defined in the table. */ + bool present; + + /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + uint8_t rt_type; + + /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ + uint8_t tex_type; + + /** + * Swizzle to apply to the RGBA shader output for storing to the tile + * buffer, to the RGBA tile buffer to produce shader input (for + * blending), and for turning the rgba8888 texture sampler return + * value into shader rgba values. + */ + uint8_t swizzle[4]; + + /* Whether the return value is 16F/I/UI or 32F/I/UI. */ + uint8_t return_size; + + /* If return_size == 32, how many channels are returned by texturing. + * 16 always returns 2 pairs of 16 bit values. + */ + uint8_t return_channels; +}; + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \ + [PIPE_FORMAT_##pipe] = { \ + true, \ + OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + return_channels, \ + } + +#define SWIZ_X001 SWIZ(X, 0, 0, 1) +#define SWIZ_XY01 SWIZ(X, Y, 0, 1) +#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) +#define SWIZ_XYZW SWIZ(X, Y, Z, W) +#define SWIZ_YZWX SWIZ(Y, Z, W, X) +#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) +#define SWIZ_ZYXW SWIZ(Z, Y, X, W) +#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) +#define SWIZ_XXXY SWIZ(X, X, X, Y) +#define SWIZ_XXX1 SWIZ(X, X, X, 1) +#define SWIZ_XXXX SWIZ(X, X, X, X) +#define SWIZ_000X SWIZ(0, 0, 0, X) + +static const struct vc5_format vc5_format_table[] = { + FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_UNORM, RGBX8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_SRGB, SRGBX8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_UNORM, RGBX8, RGBA8, SWIZ_XYZ1, 16, 0), + FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), + FORMAT(B10G10R10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_ZYXW, 16, 0), + + FORMAT(B4G4R4A4_UNORM, ABGR4444, RGBA4, SWIZ_YZWX, 16, 0), + FORMAT(B4G4R4X4_UNORM, ABGR4444, RGBA4, SWIZ_YZW1, 16, 0), + + FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0), + FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0), + FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0), + + FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0), + FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0), + FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0), + FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0), + + FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1), + FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1), + FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0), + FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2), + FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2), + FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2), + + FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4), + + /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */ + FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1), + FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1), + FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1), + FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1), + FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1), + FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1), + FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2), + FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2), + + FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0), + FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0), + FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0), + FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0), + + FORMAT(R8_SINT, R8I, S8, SWIZ_X001, 16, 0), + FORMAT(R8_UINT, R8UI, S8, SWIZ_X001, 16, 0), + FORMAT(R8G8_SINT, RG8I, S16, SWIZ_XY01, 16, 0), + FORMAT(R8G8_UINT, RG8UI, S16, SWIZ_XY01, 16, 0), + FORMAT(R8G8B8A8_SINT, RGBA8I, R32F, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8A8_UINT, RGBA8UI, R32F, SWIZ_XYZW, 16, 0), + + FORMAT(R16_SINT, R16I, S16, SWIZ_X001, 16, 0), + FORMAT(R16_UINT, R16UI, S16, SWIZ_X001, 16, 0), + FORMAT(R16G16_SINT, RG16I, R32F, SWIZ_XY01, 16, 0), + FORMAT(R16G16_UINT, RG16UI, R32F, SWIZ_XY01, 16, 0), + FORMAT(R16G16B16A16_SINT, RGBA16I, RG32F, SWIZ_XYZW, 16, 0), + FORMAT(R16G16B16A16_UINT, RGBA16UI, RG32F, SWIZ_XYZW, 16, 0), + + FORMAT(R32_SINT, R32I, R32F, SWIZ_X001, 16, 0), + FORMAT(R32_UINT, R32UI, R32F, SWIZ_X001, 16, 0), + FORMAT(R32G32_SINT, RG32I, RG32F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_UINT, RG32UI, RG32F, SWIZ_XY01, 16, 0), + FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32F, SWIZ_XYZW, 16, 0), + + FORMAT(A8_SINT, R8I, S8, SWIZ_000X, 16, 0), + FORMAT(A8_UINT, R8UI, S8, SWIZ_000X, 16, 0), + FORMAT(A16_SINT, R16I, S16, SWIZ_000X, 16, 0), + FORMAT(A16_UINT, R16UI, S16, SWIZ_000X, 16, 0), + FORMAT(A32_SINT, R32I, R32F, SWIZ_000X, 16, 0), + FORMAT(A32_UINT, R32UI, R32F, SWIZ_000X, 16, 0), + + FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0), + FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0), + + FORMAT(S8_UINT_Z24_UNORM, DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_X001, 32, 1), + FORMAT(X8Z24_UNORM, DEPTH_COMPONENT24, DEPTH24_X8, SWIZ_X001, 32, 1), + FORMAT(S8X24_UINT, NO, R32F, SWIZ_X001, 32, 1), + FORMAT(Z32_FLOAT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1), + FORMAT(Z16_UNORM, DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_X001, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0), + FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0), + + FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0), + FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0), + FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0), +}; + +static const struct vc5_format * +get_format(enum pipe_format f) +{ + if (f >= ARRAY_SIZE(vc5_format_table) || + !vc5_format_table[f].present) + return NULL; + else + return &vc5_format_table[f]; +} + +bool +vc5_rt_format_supported(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return false; + + return vf->rt_type != OUTPUT_IMAGE_FORMAT_NO; +} + +uint8_t +vc5_get_rt_format(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->rt_type; +} + +bool +vc5_tex_format_supported(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + return vf != NULL; +} + +uint8_t +vc5_get_tex_format(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->tex_type; +} + +uint8_t +vc5_get_tex_return_size(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->return_size; +} + +uint8_t +vc5_get_tex_return_channels(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + + if (!vf) + return 0; + + return vf->return_channels; +} + +const uint8_t * +vc5_get_format_swizzle(enum pipe_format f) +{ + const struct vc5_format *vf = get_format(f); + static const uint8_t fallback[] = {0, 1, 2, 3}; + + if (!vf) + return fallback; + + return vf->swizzle; +} + +void +vc5_get_internal_type_bpp_for_output_format(uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + switch (format) { + case OUTPUT_IMAGE_FORMAT_RGBA8: + case OUTPUT_IMAGE_FORMAT_RGBX8: + case OUTPUT_IMAGE_FORMAT_RGB8: + case OUTPUT_IMAGE_FORMAT_RG8: + case OUTPUT_IMAGE_FORMAT_R8: + case OUTPUT_IMAGE_FORMAT_ABGR4444: + case OUTPUT_IMAGE_FORMAT_BGR565: + case OUTPUT_IMAGE_FORMAT_ABGR1555: + *type = INTERNAL_TYPE_8; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA8I: + case OUTPUT_IMAGE_FORMAT_RG8I: + case OUTPUT_IMAGE_FORMAT_R8I: + *type = INTERNAL_TYPE_8I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA8UI: + case OUTPUT_IMAGE_FORMAT_RG8UI: + case OUTPUT_IMAGE_FORMAT_R8UI: + *type = INTERNAL_TYPE_8UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: + case OUTPUT_IMAGE_FORMAT_SRGB: + case OUTPUT_IMAGE_FORMAT_RGB10_A2: + case OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: + case OUTPUT_IMAGE_FORMAT_SRGBX8: + case OUTPUT_IMAGE_FORMAT_RGBA16F: + /* Note that sRGB RTs are stored in the tile buffer at 16F, + * and the conversion to sRGB happens at tilebuffer + * load/store. + */ + *type = INTERNAL_TYPE_16F; + *bpp = INTERNAL_BPP_64; + break; + + case OUTPUT_IMAGE_FORMAT_RG16F: + case OUTPUT_IMAGE_FORMAT_R16F: + *type = INTERNAL_TYPE_16F; + /* Use 64bpp to make sure the TLB doesn't throw away the alpha + * channel before alpha test happens. + */ + *bpp = INTERNAL_BPP_64; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA16I: + *type = INTERNAL_TYPE_16I; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_RG16I: + case OUTPUT_IMAGE_FORMAT_R16I: + *type = INTERNAL_TYPE_16I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA16UI: + *type = INTERNAL_TYPE_16UI; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_RG16UI: + case OUTPUT_IMAGE_FORMAT_R16UI: + *type = INTERNAL_TYPE_16UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32I: + *type = INTERNAL_TYPE_32I; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32UI: + *type = INTERNAL_TYPE_32UI; + *bpp = INTERNAL_BPP_32; + break; + + case OUTPUT_IMAGE_FORMAT_RGBA32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_128; + break; + case OUTPUT_IMAGE_FORMAT_RG32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_64; + break; + case OUTPUT_IMAGE_FORMAT_R32F: + *type = INTERNAL_TYPE_32F; + *bpp = INTERNAL_BPP_32; + break; + + default: + /* Provide some default values, as we'll be called at RB + * creation time, even if an RB with this format isn't + * supported. + */ + *type = INTERNAL_TYPE_8; + *bpp = INTERNAL_BPP_32; + break; + } +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_job.c b/lib/mesa/src/gallium/drivers/vc5/vc5_job.c new file mode 100644 index 000000000..ed1a64be8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_job.c @@ -0,0 +1,454 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_job.c + * + * Functions for submitting VC5 render jobs to the kernel. + */ + +#include <xf86drm.h> +#include "vc5_context.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "broadcom/clif/clif_dump.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +remove_from_ht(struct hash_table *ht, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); +} + +static void +vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) +{ + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + vc5_bo_unreference(&bo); + } + + remove_from_ht(vc5->jobs, &job->key); + + if (job->write_prscs) { + struct set_entry *entry; + + set_foreach(job->write_prscs, entry) { + const struct pipe_resource *prsc = entry->key; + + remove_from_ht(vc5->write_jobs, (void *)prsc); + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], NULL); + } + } + if (job->zsbuf) { + remove_from_ht(vc5->write_jobs, job->zsbuf->texture); + pipe_surface_reference(&job->zsbuf, NULL); + } + + if (vc5->job == job) + vc5->job = NULL; + + vc5_destroy_cl(&job->bcl); + vc5_destroy_cl(&job->rcl); + vc5_destroy_cl(&job->indirect); + vc5_bo_unreference(&job->tile_alloc); + + ralloc_free(job); +} + +static struct vc5_job * +vc5_job_create(struct vc5_context *vc5) +{ + struct vc5_job *job = rzalloc(vc5, struct vc5_job); + + job->vc5 = vc5; + + vc5_init_cl(job, &job->bcl); + vc5_init_cl(job, &job->rcl); + vc5_init_cl(job, &job->indirect); + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + job->bos = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + return job; +} + +void +vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo) +{ + if (!bo) + return; + + if (_mesa_set_search(job->bos, bo)) + return; + + vc5_bo_reference(bo); + _mesa_set_add(job->bos, bo); + + uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; + + if (job->submit.bo_handle_count >= job->bo_handles_size) { + job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); + bo_handles = reralloc(job, bo_handles, + uint32_t, job->bo_handles_size); + job->submit.bo_handles = (uintptr_t)(void *)bo_handles; + } + bo_handles[job->submit.bo_handle_count++] = bo->handle; +} + +void +vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = job->vc5; + + if (!job->write_prscs) { + job->write_prscs = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + + _mesa_set_add(job->write_prscs, prsc); + _mesa_hash_table_insert(vc5->write_jobs, prsc, job); +} + +void +vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +void +vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_flush_jobs_writing_resource(vc5, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + + if (_mesa_set_search(job->bos, rsc->bo)) { + vc5_job_submit(vc5, job); + /* Reminder: vc5->jobs is safe to keep iterating even + * after deletion of an entry. + */ + continue; + } + } +} + +static void +vc5_job_set_tile_buffer_size(struct vc5_job *job) +{ + static const uint8_t tile_sizes[] = { + 64, 64, + 64, 32, + 32, 32, + 32, 16, + 16, 16, + }; + int tile_size_index = 0; + if (job->msaa) + tile_size_index += 2; + + if (job->cbufs[3] || job->cbufs[2]) + tile_size_index += 2; + else if (job->cbufs[1]) + tile_size_index++; + + int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + struct vc5_surface *surf = vc5_surface(job->cbufs[i]); + max_bpp = MAX2(max_bpp, surf->internal_bpp); + } + } + job->internal_bpp = max_bpp; + STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); + tile_size_index += max_bpp; + + assert(tile_size_index < ARRAY_SIZE(tile_sizes)); + job->tile_width = tile_sizes[tile_size_index * 2 + 0]; + job->tile_height = tile_sizes[tile_size_index * 2 + 1]; +} + +/** + * Returns a vc5_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc5_job * +vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc5_job_key local_key = { + .cbufs = { + cbufs[0], + cbufs[1], + cbufs[2], + cbufs[3], + }, + .zsbuf = zsbuf, + }; + struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + struct vc5_job *job = vc5_job_create(vc5); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) { + vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], cbufs[i]); + + if (cbufs[i]->texture->nr_samples > 1) + job->msaa = true; + } + } + if (zsbuf) { + vc5_flush_jobs_reading_resource(vc5, zsbuf->texture); + pipe_surface_reference(&job->zsbuf, zsbuf); + if (zsbuf->texture->nr_samples > 1) + job->msaa = true; + } + + vc5_job_set_tile_buffer_size(job); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) + _mesa_hash_table_insert(vc5->write_jobs, + cbufs[i]->texture, job); + } + if (zsbuf) + _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job); + + memcpy(&job->key, &local_key, sizeof(local_key)); + _mesa_hash_table_insert(vc5->jobs, &job->key, job); + + return job; +} + +struct vc5_job * +vc5_get_job_for_fbo(struct vc5_context *vc5) +{ + if (vc5->job) + return vc5->job; + + struct pipe_surface **cbufs = vc5->framebuffer.cbufs; + struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf; + struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf); + + /* The dirty flags are tracking what's been updated while vc5->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. + */ + vc5->dirty = ~0; + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + for (int i = 0; i < 4; i++) { + if (cbufs[i]) { + struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0 << i; + } + } + + if (zsbuf) { + struct vc5_resource *rsc = vc5_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height, + job->tile_height); + + vc5->job = job; + + return job; +} + +static bool +vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr) +{ + struct vc5_job *job = data; + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (void *)entry->key; + + if (addr >= bo->offset && + addr < bo->offset + bo->size) { + vc5_bo_map(bo); + *vaddr = bo->map + addr - bo->offset; + return true; + } + } + + return false; +} + +static void +vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!(V3D_DEBUG & V3D_DEBUG_CL)) + return; + + struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo, + stderr, vc5_clif_dump_lookup, + job); + + fprintf(stderr, "BCL: 0x%08x..0x%08x\n", + job->submit.bcl_start, job->submit.bcl_end); + + clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end); + + fprintf(stderr, "RCL: 0x%08x..0x%08x\n", + job->submit.rcl_start, job->submit.rcl_end); + clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end); +} + +/** + * Submits the job to the kernel and then reinitializes it. + */ +void +vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!job->needs_flush) + goto done; + + /* The RCL setup would choke if the draw bounds cause no drawing, so + * just drop the drawing if that's the case. + */ + if (job->draw_max_x <= job->draw_min_x || + job->draw_max_y <= job->draw_min_y) { + goto done; + } + + vc5_emit_rcl(job); + + if (cl_offset(&job->bcl) > 0) { + vc5_cl_ensure_space_with_branch(&job->bcl, 2); + + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act + * until the FLUSH completes. + */ + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); + + /* The FLUSH caps all of our bin lists with a + * VC5_PACKET_RETURN. + */ + cl_emit(&job->bcl, FLUSH, flush); + } + + job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); + job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); + + vc5_clif_dump(vc5, job); + + if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { + int ret; + +#ifndef USE_VC5_SIMULATOR + ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit); +#else + ret = vc5_simulator_flush(vc5, &job->submit, job); +#endif + static bool warned = false; + if (ret && !warned) { + fprintf(stderr, "Draw call returned %s. " + "Expect corruption.\n", strerror(errno)); + warned = true; + } + } + + if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) { + if (!vc5_wait_seqno(vc5->screen, + vc5->last_emit_seqno - 5, + PIPE_TIMEOUT_INFINITE, + "job throttling")) { + fprintf(stderr, "Job throttling failed\n"); + } + } + +done: + vc5_job_free(vc5, job); +} + +static bool +vc5_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc5_job_key)) == 0; +} + +static uint32_t +vc5_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc5_job_key)); +} + +void +vc5_job_init(struct vc5_context *vc5) +{ + vc5->jobs = _mesa_hash_table_create(vc5, + vc5_job_hash, + vc5_job_compare); + vc5->write_jobs = _mesa_hash_table_create(vc5, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_program.c b/lib/mesa/src/gallium/drivers/vc5/vc5_program.c new file mode 100644 index 000000000..8e9af1ad8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_program.c @@ -0,0 +1,594 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/ralloc.h" +#include "util/hash_table.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "nir/tgsi_to_nir.h" +#include "compiler/v3d_compiler.h" +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static gl_varying_slot +vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) +{ + nir_foreach_variable(var, &s->outputs) { + if (var->data.driver_location == driver_location) { + return var->data.location; + } + } + + return -1; +} + +static void +vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, + const struct pipe_stream_output_info *stream_output) +{ + if (!stream_output->num_outputs) + return; + + struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; + int slot_count = 0; + + for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { + uint32_t buffer_offset = 0; + uint32_t vpm_start = slot_count; + + for (int i = 0; i < stream_output->num_outputs; i++) { + const struct pipe_stream_output *output = + &stream_output->output[i]; + + if (output->output_buffer != buffer) + continue; + + /* We assume that the SO outputs appear in increasing + * order in the buffer. + */ + assert(output->dst_offset >= buffer_offset); + + /* Pad any undefined slots in the output */ + for (int j = buffer_offset; j < output->dst_offset; j++) { + slots[slot_count] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); + slot_count++; + } + + /* Set the coordinate shader up to output the + * components of this varying. + */ + for (int j = 0; j < output->num_components; j++) { + gl_varying_slot slot = + vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index); + + slots[slot_count] = + v3d_slot_from_slot_and_component(slot, + output->start_component + j); + slot_count++; + } + } + + uint32_t vpm_size = slot_count - vpm_start; + if (!vpm_size) + continue; + + struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + /* We need the offset from the coordinate shader's VPM + * output block, which has the [X, Y, Z, W, Xs, Ys] + * values at the start. Note that this will need some + * shifting when PSIZ is also present. + */ + .first_shaded_vertex_value_to_output = vpm_start + 6, + .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = vpm_size - 1, + .output_buffer_to_write_to = buffer, + }; + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs[so->num_tf_specs++], + &unpacked); + } + + so->num_tf_outputs = slot_count; + so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, + slot_count); + memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); +} + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static void * +vc5_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader); + if (!so) + return NULL; + + so->program_id = vc5->next_uncompiled_program_id++; + + nir_shader *s; + + if (cso->type == PIPE_SHADER_IR_NIR) { + /* The backend takes ownership of the NIR shader on state + * creation. + */ + s = cso->ir.nir; + + NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + if (V3D_DEBUG & V3D_DEBUG_TGSI) { + fprintf(stderr, "prog %d TGSI:\n", + so->program_id); + tgsi_dump(cso->tokens, 0); + fprintf(stderr, "\n"); + } + s = tgsi_to_nir(cso->tokens, &v3d_nir_options); + } + + NIR_PASS_V(s, nir_opt_global_to_local); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_normalize_cubemap_coords); + + NIR_PASS_V(s, nir_lower_load_const_to_scalar); + + v3d_optimize_nir(s); + + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + + /* Garbage collect dead instructions */ + nir_sweep(s); + + so->base.type = PIPE_SHADER_IR_NIR; + so->base.ir.nir = s; + + vc5_set_transform_feedback_outputs(so, &cso->stream_output); + + if (V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage(s->info.stage))) { + fprintf(stderr, "%s prog %d NIR:\n", + gl_shader_stage_name(s->info.stage), + so->program_id); + nir_print_shader(s, stderr); + fprintf(stderr, "\n"); + } + + return so; +} + +static struct vc5_compiled_shader * +vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key) +{ + struct vc5_uncompiled_shader *shader_state = key->shader_state; + nir_shader *s = shader_state->base.ir.nir; + + struct hash_table *ht; + uint32_t key_size; + if (s->info.stage == MESA_SHADER_FRAGMENT) { + ht = vc5->fs_cache; + key_size = sizeof(struct v3d_fs_key); + } else { + ht = vc5->vs_cache; + key_size = sizeof(struct v3d_vs_key); + } + + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + if (entry) + return entry->data; + + struct vc5_compiled_shader *shader = + rzalloc(NULL, struct vc5_compiled_shader); + + int program_id = shader_state->program_id; + int variant_id = + p_atomic_inc_return(&shader_state->compiled_variant_count); + uint64_t *qpu_insts; + uint32_t shader_size; + + switch (s->info.stage) { + case MESA_SHADER_VERTEX: + shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); + + qpu_insts = v3d_compile_vs(vc5->screen->compiler, + (struct v3d_vs_key *)key, + shader->prog_data.vs, s, + program_id, variant_id, + &shader_size); + break; + case MESA_SHADER_FRAGMENT: + shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); + + qpu_insts = v3d_compile_fs(vc5->screen->compiler, + (struct v3d_fs_key *)key, + shader->prog_data.fs, s, + program_id, variant_id, + &shader_size); + break; + default: + unreachable("bad stage"); + } + + vc5_set_shader_uniform_dirty_flags(shader); + + shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader"); + vc5_bo_map(shader->bo); + memcpy(shader->bo->map, qpu_insts, shader_size); + + free(qpu_insts); + + struct vc5_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + + return shader; +} + +static void +vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key, + struct vc5_texture_stateobj *texstate) +{ + for (int i = 0; i < texstate->num_textures; i++) { + struct pipe_sampler_view *sampler = texstate->textures[i]; + struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler); + struct pipe_sampler_state *sampler_state = + texstate->samplers[i]; + + if (!sampler) + continue; + + key->tex[i].return_size = + vc5_get_tex_return_size(sampler->format); + + /* For 16-bit, we set up the sampler to always return 2 + * channels (meaning no recompiles for most statechanges), + * while for 32 we actually scale the returns with channels. + */ + if (key->tex[i].return_size == 16) { + key->tex[i].return_channels = 2; + } else { + key->tex[i].return_channels = + vc5_get_tex_return_channels(sampler->format); + } + + if (vc5_get_tex_return_size(sampler->format) == 32) { + memcpy(key->tex[i].swizzle, + vc5_sampler->swizzle, + sizeof(vc5_sampler->swizzle)); + } else { + /* For 16-bit returns, we let the sampler state handle + * the swizzle. + */ + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } + + if (sampler->texture->nr_samples > 1) { + key->tex[i].msaa_width = sampler->texture->width0; + key->tex[i].msaa_height = sampler->texture->height0; + } else if (sampler){ + key->tex[i].compare_mode = sampler_state->compare_mode; + key->tex[i].compare_func = sampler_state->compare_func; + key->tex[i].wrap_s = sampler_state->wrap_s; + key->tex[i].wrap_t = sampler_state->wrap_t; + } + } + + key->ucp_enables = vc5->rasterizer->base.clip_plane_enable; +} + +static void +vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct vc5_job *job = vc5->job; + struct v3d_fs_key local_key; + struct v3d_fs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_BLEND | + VC5_DIRTY_FRAMEBUFFER | + VC5_DIRTY_ZSA | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_SAMPLE_MASK | + VC5_DIRTY_FRAGTEX | + VC5_DIRTY_UNCOMPILED_FS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex); + key->base.shader_state = vc5->prog.bind_fs; + key->is_points = (prim_mode == PIPE_PRIM_POINTS); + key->is_lines = (prim_mode >= PIPE_PRIM_LINES && + prim_mode <= PIPE_PRIM_LINE_STRIP); + key->clamp_color = vc5->rasterizer->base.clamp_fragment_color; + if (vc5->blend->logicop_enable) { + key->logicop_func = vc5->blend->logicop_func; + } else { + key->logicop_func = PIPE_LOGICOP_COPY; + } + if (job->msaa) { + key->msaa = vc5->rasterizer->base.multisample; + key->sample_coverage = (vc5->rasterizer->base.multisample && + vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage; + key->sample_alpha_to_one = vc5->blend->alpha_to_one; + } + + key->depth_enabled = (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled); + if (vc5->zsa->base.alpha.enabled) { + key->alpha_test = true; + key->alpha_test_func = vc5->zsa->base.alpha.func; + } + + /* gl_FragColor's propagation to however many bound color buffers + * there are means that the buffer count needs to be in the key. + */ + key->nr_cbufs = vc5->framebuffer.nr_cbufs; + + for (int i = 0; i < key->nr_cbufs; i++) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + const struct util_format_description *desc = + util_format_description(cbuf->format); + + if (desc->swizzle[0] == PIPE_SWIZZLE_Z) + key->swap_color_rb |= 1 << i; + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } + } + + if (key->is_points) { + key->point_sprite_mask = + vc5->rasterizer->base.sprite_coord_enable; + key->point_coord_upper_left = + (vc5->rasterizer->base.sprite_coord_mode == + PIPE_SPRITE_COORD_UPPER_LEFT); + } + + key->light_twoside = vc5->rasterizer->base.light_twoside; + + struct vc5_compiled_shader *old_fs = vc5->prog.fs; + vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base); + if (vc5->prog.fs == old_fs) + return; + + vc5->dirty |= VC5_DIRTY_COMPILED_FS; + + if (old_fs && + (vc5->prog.fs->prog_data.fs->flat_shade_flags != + old_fs->prog_data.fs->flat_shade_flags || + (vc5->rasterizer->base.flatshade && + vc5->prog.fs->prog_data.fs->shade_model_flags != + old_fs->prog_data.fs->shade_model_flags))) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots, + old_fs->prog_data.fs->input_slots, + sizeof(vc5->prog.fs->prog_data.fs->input_slots))) { + vc5->dirty |= VC5_DIRTY_FS_INPUTS; + } +} + +static void +vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct v3d_vs_key local_key; + struct v3d_vs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_VERTTEX | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_UNCOMPILED_VS | + VC5_DIRTY_FS_INPUTS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->verttex); + key->base.shader_state = vc5->prog.bind_vs; + key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs; + STATIC_ASSERT(sizeof(key->fs_inputs) == + sizeof(vc5->prog.fs->prog_data.fs->input_slots)); + memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots, + sizeof(key->fs_inputs)); + key->clamp_color = vc5->rasterizer->base.clamp_vertex_color; + + key->per_vertex_point_size = + (prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + struct vc5_compiled_shader *vs = + vc5_get_compiled_shader(vc5, &key->base); + if (vs != vc5->prog.vs) { + vc5->prog.vs = vs; + vc5->dirty |= VC5_DIRTY_COMPILED_VS; + } + + key->is_coord = true; + /* Coord shaders only output varyings used by transform feedback. */ + struct vc5_uncompiled_shader *shader_state = key->base.shader_state; + memcpy(key->fs_inputs, shader_state->tf_outputs, + sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_fs_inputs) { + memset(&key->fs_inputs[shader_state->num_tf_outputs], + 0, + sizeof(*key->fs_inputs) * (key->num_fs_inputs - + shader_state->num_tf_outputs)); + } + key->num_fs_inputs = shader_state->num_tf_outputs; + + struct vc5_compiled_shader *cs = + vc5_get_compiled_shader(vc5, &key->base); + if (cs != vc5->prog.cs) { + vc5->prog.cs = cs; + vc5->dirty |= VC5_DIRTY_COMPILED_CS; + } +} + +void +vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode) +{ + vc5_update_compiled_fs(vc5, prim_mode); + vc5_update_compiled_vs(vc5, prim_mode); +} + +static uint32_t +fs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); +} + +static uint32_t +vs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); +} + +static bool +fs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; +} + +static bool +vs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; +} + +static void +delete_from_cache_if_matches(struct hash_table *ht, + struct vc5_compiled_shader **last_compile, + struct hash_entry *entry, + struct vc5_uncompiled_shader *so) +{ + const struct v3d_key *key = entry->key; + + if (key->shader_state == so) { + struct vc5_compiled_shader *shader = entry->data; + _mesa_hash_table_remove(ht, entry); + vc5_bo_unreference(&shader->bo); + + if (shader == *last_compile) + *last_compile = NULL; + + ralloc_free(shader); + } +} + +static void +vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = hwcso; + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs, + entry, so); + } + hash_table_foreach(vc5->vs_cache, entry) { + delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs, + entry, so); + } + + ralloc_free(so->base.ir.nir); + free(so); +} + +static void +vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_fs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS; +} + +static void +vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_vs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS; +} + +void +vc5_program_init(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + pctx->create_vs_state = vc5_shader_state_create; + pctx->delete_vs_state = vc5_shader_state_delete; + + pctx->create_fs_state = vc5_shader_state_create; + pctx->delete_fs_state = vc5_shader_state_delete; + + pctx->bind_fs_state = vc5_fp_state_bind; + pctx->bind_vs_state = vc5_vp_state_bind; + + vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, + fs_cache_compare); + vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, + vs_cache_compare); +} + +void +vc5_program_fini(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->fs_cache, entry); + } + + hash_table_foreach(vc5->vs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->vs_cache, entry); + } +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_query.c b/lib/mesa/src/gallium/drivers/vc5/vc5_query.c new file mode 100644 index 000000000..c114e76ee --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_query.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * Stub support for occlusion queries. + * + * Since we expose support for GL 2.0, we have to expose occlusion queries, + * but the spec allows you to expose 0 query counter bits, so we just return 0 + * as the result of all our queries. + */ +#include "vc5_context.h" + +struct vc5_query +{ + uint8_t pad; +}; + +static struct pipe_query * +vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct vc5_query *query = calloc(1, sizeof(*query)); + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)query; +} + +static void +vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + free(query); +} + +static boolean +vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +vc5_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static boolean +vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + uint64_t *result = &vresult->u64; + + *result = 0; + + return true; +} + +static void +vc5_set_active_query_state(struct pipe_context *pipe, boolean enable) +{ +} + +void +vc5_query_init(struct pipe_context *pctx) +{ + pctx->create_query = vc5_create_query; + pctx->destroy_query = vc5_destroy_query; + pctx->begin_query = vc5_begin_query; + pctx->end_query = vc5_end_query; + pctx->get_query_result = vc5_get_query_result; + pctx->set_active_query_state = vc5_set_active_query_state; +} + diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c b/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c new file mode 100644 index 000000000..4ef2d8379 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c @@ -0,0 +1,297 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "vc5_context.h" +#include "vc5_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job) +{ + /* Emit the generic list in our indirect state -- the rcl will just + * have pointers into it. + */ + struct vc5_cl *cl = &job->indirect; + vc5_cl_ensure_space(cl, 200, 1); + struct vc5_cl_reloc tile_list_start = cl_get_address(cl); + + const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 | + PIPE_CLEAR_COLOR1 | + PIPE_CLEAR_COLOR2 | + PIPE_CLEAR_COLOR3); + const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1); + + uint32_t read_but_not_cleared = job->resolve & ~job->cleared; + + /* The initial reload will be queued until we get the + * tile coordinates. + */ + if (read_but_not_cleared) { + cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) { + load.disable_colour_buffer_load = + (~read_but_not_cleared & pipe_clear_color_buffers) >> + first_color_buffer_bit; + load.enable_z_load = + read_but_not_cleared & PIPE_CLEAR_DEPTH; + load.enable_stencil_load = + read_but_not_cleared & PIPE_CLEAR_STENCIL; + } + } + + /* Tile Coordinates triggers the reload and sets where the stores + * go. There must be one per store packet. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { + uint32_t color_write_enables = + job->resolve >> first_color_buffer_bit; + + store.disable_color_buffer_write = (~color_write_enables) & 0xf; + store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH; + store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL; + + store.disable_colour_buffers_clear_on_write = + (job->cleared & pipe_clear_color_buffers) == 0; + store.disable_z_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_DEPTH); + store.disable_stencil_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_STENCIL); + }; + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = cl_get_address(cl); + } +} + +#define div_round_up(a, b) (((a) + (b) - 1) / b) + +void +vc5_emit_rcl(struct vc5_job *job) +{ + /* The RCL list should be empty. */ + assert(!job->rcl.bo); + + vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + job->submit.rcl_start = job->rcl.bo->offset; + vc5_job_add_bo(job, job->rcl.bo); + + int nr_cbufs = 0; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) + nr_cbufs = i + 1; + } + + /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION + * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are + * optional updates to the previous HW state. + */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION, + config) { + config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH; + config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL; + + config.early_z_disable = !job->uses_early_z; + + config.image_width_pixels = job->draw_width; + config.image_height_pixels = job->draw_height; + + config.number_of_render_targets_minus_1 = + MAX2(nr_cbufs, 1) - 1; + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + for (int i = 0; i < nr_cbufs; i++) { + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf) + continue; + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + rt.address = cl_address(rsc->bo, surf->offset); + rt.internal_type = surf->internal_type; + rt.output_image_format = surf->format; + rt.memory_format = surf->tiling; + rt.internal_bpp = surf->internal_bpp; + rt.render_target_number = i; + + if (job->resolve & PIPE_CLEAR_COLOR0 << i) + rsc->writes++; + } + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1, + clear) { + clear.clear_color_low_32_bits = job->clear_color[i][0]; + clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; + clear.render_target_number = i; + }; + + if (util_format_get_blocksize(psurf->format) > 7) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2, + clear) { + clear.clear_color_mid_low_32_bits = + ((job->clear_color[i][1] >> 24) | + (job->clear_color[i][2] << 8)); + clear.clear_color_mid_high_24_bits = + ((job->clear_color[i][2] >> 24) | + ((job->clear_color[i][3] & 0xffff) << 8)); + clear.render_target_number = i; + }; + } + + if (util_format_get_blocksize(psurf->format) > 14) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3, + clear) { + clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; + clear.render_target_number = i; + }; + } + } + + /* TODO: Don't bother emitting if we don't load/clear Z/S. */ + if (job->zsbuf) { + struct pipe_surface *psurf = job->zsbuf; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) { + zs.address = cl_address(rsc->bo, surf->offset); + + zs.internal_type = surf->internal_type; + zs.output_image_format = surf->format; + + struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; + /* XXX */ + zs.padded_height_of_output_image_in_uif_blocks = + (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp)); + + assert(surf->tiling != VC5_TILING_RASTER); + zs.memory_format = surf->tiling; + } + + if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL) + rsc->writes++; + } + + /* Ends rendering mode config. */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES, + clear) { + clear.z_clear_value = job->clear_z; + clear.stencil_vg_mask_clear_value = job->clear_s; + }; + + /* Always set initial block size before the first branch, which needs + * to match the value from binning mode config. + */ + cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + uint32_t supertile_w = 1, supertile_h = 1; + + /* If doing multicore binning, we would need to initialize each core's + * tile list here. + */ + cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = cl_address(job->tile_alloc, 0); + } + + cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) { + uint32_t frame_w_in_supertiles, frame_h_in_supertiles; + const uint32_t max_supertiles = 256; + + /* Size up our supertiles until we get under the limit. */ + for (;;) { + frame_w_in_supertiles = div_round_up(job->draw_tiles_x, + supertile_w); + frame_h_in_supertiles = div_round_up(job->draw_tiles_y, + supertile_h); + if (frame_w_in_supertiles * frame_h_in_supertiles < + max_supertiles) { + break; + } + + if (supertile_w < supertile_h) + supertile_w++; + else + supertile_h++; + } + + config.total_frame_width_in_tiles = job->draw_tiles_x; + config.total_frame_height_in_tiles = job->draw_tiles_y; + + config.supertile_width_in_tiles_minus_1 = supertile_w - 1; + config.supertile_height_in_tiles_minus_1 = supertile_h - 1; + + config.total_frame_width_in_supertiles = frame_w_in_supertiles; + config.total_frame_height_in_supertiles = frame_h_in_supertiles; + } + + /* Start by clearing the tile buffer. */ + cl_emit(&job->rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = 0; + coords.tile_row_number = 0; + } + + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + + cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); + + vc5_rcl_emit_generic_per_tile_list(job); + + cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); + + /* XXX: Use Morton order */ + uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; + uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; + uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; + uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; + uint32_t max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; + uint32_t max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; + + for (int y = min_y_supertile; y <= max_y_supertile; y++) { + for (int x = min_x_supertile; x <= max_x_supertile; x++) { + cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } + + cl_emit(&job->rcl, END_OF_RENDERING, end); +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c new file mode 100644 index 000000000..42d58791f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c @@ -0,0 +1,751 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blit.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_upload_mgr.h" + +#include "drm_fourcc.h" +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" +#include "vc5_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) +#endif + +static bool +vc5_resource_bo_alloc(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + struct pipe_screen *pscreen = prsc->screen; + struct vc5_bo *bo; + int layers = (prsc->target == PIPE_TEXTURE_3D ? + prsc->depth0 : prsc->array_size); + + bo = vc5_bo_alloc(vc5_screen(pscreen), + rsc->slices[0].offset + + rsc->slices[0].size + + rsc->cube_map_stride * layers - 1, + "resource"); + if (bo) { + DBG(V3D_DEBUG_SURFACE, "alloc %p @ 0x%08x:\n", rsc, bo->offset); + vc5_bo_unreference(&rsc->bo); + rsc->bo = bo; + return true; + } else { + return false; + } +} + +static void +vc5_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_transfer *trans = vc5_transfer(ptrans); + + if (trans->map) { + struct vc5_resource *rsc; + struct vc5_resource_slice *slice; + if (trans->ss_resource) { + rsc = vc5_resource(trans->ss_resource); + slice = &rsc->slices[0]; + } else { + rsc = vc5_resource(ptrans->resource); + slice = &rsc->slices[ptrans->level]; + } + + if (ptrans->usage & PIPE_TRANSFER_WRITE) { + vc5_store_tiled_image(rsc->bo->map + slice->offset + + ptrans->box.z * rsc->cube_map_stride, + slice->stride, + trans->map, ptrans->stride, + slice->tiling, rsc->cpp, + rsc->base.height0, + &ptrans->box); + } + free(trans->map); + } + + if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) { + struct pipe_blit_info blit; + memset(&blit, 0, sizeof(blit)); + + blit.src.resource = trans->ss_resource; + blit.src.format = trans->ss_resource->format; + blit.src.box.width = trans->ss_box.width; + blit.src.box.height = trans->ss_box.height; + blit.src.box.depth = 1; + + blit.dst.resource = ptrans->resource; + blit.dst.format = ptrans->resource->format; + blit.dst.level = ptrans->level; + blit.dst.box = trans->ss_box; + + blit.mask = util_format_get_mask(ptrans->resource->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pctx->blit(pctx, &blit); + + pipe_resource_reference(&trans->ss_resource, NULL); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&vc5->transfer_pool, ptrans); +} + +static struct pipe_resource * +vc5_get_temp_resource(struct pipe_context *pctx, + struct pipe_resource *prsc, + const struct pipe_box *box) +{ + struct pipe_resource temp_setup; + + memset(&temp_setup, 0, sizeof(temp_setup)); + temp_setup.target = prsc->target; + temp_setup.format = prsc->format; + temp_setup.width0 = box->width; + temp_setup.height0 = box->height; + temp_setup.depth0 = 1; + temp_setup.array_size = 1; + + return pctx->screen->resource_create(pctx->screen, &temp_setup); +} + +static void * +vc5_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_transfer *trans; + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is + * being mapped. + */ + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && + prsc->last_level == 0 && + prsc->width0 == box->width && + prsc->height0 == box->height && + prsc->depth0 == box->depth && + prsc->array_size == 1 && + rsc->bo->private) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + if (vc5_resource_bo_alloc(rsc)) { + /* If it might be bound as one of our vertex buffers + * or UBOs, make sure we re-emit vertex buffer state + * or uniforms. + */ + if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) + vc5->dirty |= VC5_DIRTY_VTXBUF; + if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) + vc5->dirty |= VC5_DIRTY_CONSTBUF; + } else { + /* If we failed to reallocate, flush users so that we + * don't violate any syncing requirements. + */ + vc5_flush_jobs_reading_resource(vc5, prsc); + } + } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + /* If we're writing and the buffer is being used by the CL, we + * have to flush the CL first. If we're only reading, we need + * to flush if the CL has written our buffer. + */ + if (usage & PIPE_TRANSFER_WRITE) + vc5_flush_jobs_reading_resource(vc5, prsc); + else + vc5_flush_jobs_writing_resource(vc5, prsc); + } + + if (usage & PIPE_TRANSFER_WRITE) { + rsc->writes++; + rsc->initialized_buffers = ~0; + } + + trans = slab_alloc(&vc5->transfer_pool); + if (!trans) + return NULL; + + /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ + + /* slab_alloc_st() doesn't zero: */ + memset(trans, 0, sizeof(*trans)); + ptrans = &trans->base; + + pipe_resource_reference(&ptrans->resource, prsc); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + + /* If the resource is multisampled, we need to resolve to single + * sample. This seems like it should be handled at a higher layer. + */ + if (prsc->nr_samples > 1) { + trans->ss_resource = vc5_get_temp_resource(pctx, prsc, box); + if (!trans->ss_resource) + goto fail; + assert(!trans->ss_resource->nr_samples); + + /* The ptrans->box gets modified for tile alignment, so save + * the original box for unmap time. + */ + trans->ss_box = *box; + + if (usage & PIPE_TRANSFER_READ) { + struct pipe_blit_info blit; + memset(&blit, 0, sizeof(blit)); + + blit.src.resource = ptrans->resource; + blit.src.format = ptrans->resource->format; + blit.src.level = ptrans->level; + blit.src.box = trans->ss_box; + + blit.dst.resource = trans->ss_resource; + blit.dst.format = trans->ss_resource->format; + blit.dst.box.width = trans->ss_box.width; + blit.dst.box.height = trans->ss_box.height; + blit.dst.box.depth = 1; + + blit.mask = util_format_get_mask(prsc->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pctx->blit(pctx, &blit); + vc5_flush_jobs_writing_resource(vc5, blit.dst.resource); + } + + /* The rest of the mapping process should use our temporary. */ + prsc = trans->ss_resource; + rsc = vc5_resource(prsc); + ptrans->box.x = 0; + ptrans->box.y = 0; + ptrans->box.z = 0; + } + + /* Note that the current kernel implementation is synchronous, so no + * need to do syncing stuff here yet. + */ + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = vc5_bo_map_unsynchronized(rsc->bo); + else + buf = vc5_bo_map(rsc->bo); + if (!buf) { + fprintf(stderr, "Failed to map bo\n"); + goto fail; + } + + *pptrans = ptrans; + + struct vc5_resource_slice *slice = &rsc->slices[level]; + if (rsc->tiled) { + /* No direct mappings of tiled, since we need to manually + * tile/untile. + */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + ptrans->stride = ptrans->box.width * rsc->cpp; + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); + + if (usage & PIPE_TRANSFER_READ) { + vc5_load_tiled_image(trans->map, ptrans->stride, + buf + slice->offset + + ptrans->box.z * rsc->cube_map_stride, + slice->stride, + slice->tiling, rsc->cpp, + rsc->base.height0, + &ptrans->box); + } + return trans->map; + } else { + ptrans->stride = slice->stride; + ptrans->layer_stride = ptrans->stride; + + return buf + slice->offset + + ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride + + ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp + + ptrans->box.z * rsc->cube_map_stride; + } + + +fail: + vc5_resource_transfer_unmap(pctx, ptrans); + return NULL; +} + +static void +vc5_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + vc5_bo_unreference(&rsc->bo); + free(rsc); +} + +static boolean +vc5_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *pctx, + struct pipe_resource *prsc, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_bo *bo = rsc->bo; + + whandle->stride = rsc->slices[0].stride; + + /* If we're passing some reference to our BO out to some other part of + * the system, then we can't do any optimizations about only us being + * the ones seeing it (like BO caching). + */ + bo->private = false; + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + return vc5_bo_flink(bo, &whandle->handle); + case DRM_API_HANDLE_TYPE_KMS: + whandle->handle = bo->handle; + return TRUE; + case DRM_API_HANDLE_TYPE_FD: + whandle->handle = vc5_bo_get_dmabuf(bo); + return whandle->handle != -1; + } + + return FALSE; +} + +static void +vc5_setup_slices(struct vc5_resource *rsc, const char *caller) +{ + struct pipe_resource *prsc = &rsc->base; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t pot_width = util_next_power_of_two(width); + uint32_t pot_height = util_next_power_of_two(height); + uint32_t offset = 0; + uint32_t utile_w = vc5_utile_width(rsc->cpp); + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_w = utile_w * 2; + uint32_t uif_block_h = utile_h * 2; + bool uif_top = false; + + for (int i = prsc->last_level; i >= 0; i--) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + uint32_t level_width, level_height; + if (i < 2) { + level_width = u_minify(width, i); + level_height = u_minify(height, i); + } else { + level_width = u_minify(pot_width, i); + level_height = u_minify(pot_height, i); + } + + if (!rsc->tiled) { + slice->tiling = VC5_TILING_RASTER; + if (prsc->nr_samples > 1) { + /* MSAA (4x) surfaces are stored as raw tile buffer contents. */ + level_width = align(level_width, 32); + level_height = align(level_height, 32); + } + } else { + if ((i != 0 || !uif_top) && + (level_width <= utile_w || + level_height <= utile_h)) { + slice->tiling = VC5_TILING_LINEARTILE; + level_width = align(level_width, utile_w); + level_height = align(level_height, utile_h); + } else if ((i != 0 || !uif_top) && + level_width <= uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + level_width = align(level_width, uif_block_w); + level_height = align(level_height, uif_block_h); + } else if ((i != 0 || !uif_top) && + level_width <= 2 * uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + level_width = align(level_width, 2 * uif_block_w); + level_height = align(level_height, uif_block_h); + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + + level_width = align(level_width, + 4 * uif_block_w); + level_height = align(level_height, + 4 * uif_block_h); + } + } + + slice->offset = offset; + slice->stride = (level_width * rsc->cpp * + MAX2(prsc->nr_samples, 1)); + slice->size = level_height * slice->stride; + + offset += slice->size; + + if (V3D_DEBUG & V3D_DEBUG_SURFACE) { + static const char *const tiling_descriptions[] = { + [VC5_TILING_RASTER] = "R", + [VC5_TILING_LINEARTILE] = "LT", + [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1", + [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2", + [VC5_TILING_UIF_NO_XOR] = "UIF", + [VC5_TILING_UIF_XOR] = "UIF^", + }; + + fprintf(stderr, + "rsc %s %p (format %s), %dx%d: " + "level %d (%s) %dx%d -> %dx%d, stride %d@0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + i, tiling_descriptions[slice->tiling], + u_minify(prsc->width0, i), + u_minify(prsc->height0, i), + level_width, level_height, + slice->stride, slice->offset); + } + } + + /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only + * needs to be aligned to utile boundaries. Since tiles are laid out + * from small to big in memory, we need to align the later UIF slices + * to UIF blocks, if they were preceded by non-UIF-block-aligned LT + * slices. + * + * We additionally align to 4k, which improves UIF XOR performance. + */ + uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - + rsc->slices[0].offset); + if (page_align_offset) { + for (int i = 0; i <= prsc->last_level; i++) + rsc->slices[i].offset += page_align_offset; + } + + /* Arrays, cubes, and 3D textures have a stride which is the distance + * from one full mipmap tree to the next (64b aligned). + */ + rsc->cube_map_stride = align(rsc->slices[0].offset + + rsc->slices[0].size, 64); +} + +static struct vc5_resource * +vc5_resource_setup(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource); + if (!rsc) + return NULL; + struct pipe_resource *prsc = &rsc->base; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + if (prsc->nr_samples <= 1) + rsc->cpp = util_format_get_blocksize(tmpl->format); + else + rsc->cpp = sizeof(uint32_t); + + assert(rsc->cpp); + + return rsc; +} + +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + +static struct pipe_resource * +vc5_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, + int count) +{ + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + /* Use a tiled layout if we can, for better 3D performance. */ + bool should_tile = true; + + /* VBOs/PBOs are untiled (and 1 height). */ + if (tmpl->target == PIPE_BUFFER) + should_tile = false; + + /* Cursors are always linear, and the user can request linear as well. + */ + if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) + should_tile = false; + + /* Scanout BOs for simulator need to be linear for interaction with + * i965. + */ + if (using_vc5_simulator && + tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + should_tile = false; + + /* No user-specified modifier; determine our own. */ + if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { + linear_ok = true; + rsc->tiled = should_tile; + } else if (should_tile && + find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + modifiers, count)) { + rsc->tiled = true; + } else if (linear_ok) { + rsc->tiled = false; + } else { + fprintf(stderr, "Unsupported modifier requested\n"); + return NULL; + } + + if (tmpl->target != PIPE_BUFFER) + rsc->tex_format = vc5_get_tex_format(prsc->format); + + vc5_setup_slices(rsc, "create"); + if (!vc5_resource_bo_alloc(rsc)) + goto fail; + + return prsc; +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +struct pipe_resource * +vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + +static struct pipe_resource * +vc5_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + struct vc5_resource_slice *slice = &rsc->slices[0]; + + if (!rsc) + return NULL; + + switch (whandle->modifier) { + case DRM_FORMAT_MOD_LINEAR: + rsc->tiled = false; + break; + /* XXX: UIF */ + default: + fprintf(stderr, + "Attempt to import unsupported modifier 0x%llx\n", + (long long)whandle->modifier); + goto fail; + } + + if (whandle->offset != 0) { + fprintf(stderr, + "Attempt to import unsupported winsys offset %u\n", + whandle->offset); + goto fail; + } + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + rsc->bo = vc5_bo_open_name(screen, + whandle->handle, whandle->stride); + break; + case DRM_API_HANDLE_TYPE_FD: + rsc->bo = vc5_bo_open_dmabuf(screen, + whandle->handle, whandle->stride); + break; + default: + fprintf(stderr, + "Attempt to import unsupported handle type %d\n", + whandle->type); + goto fail; + } + + if (!rsc->bo) + goto fail; + + vc5_setup_slices(rsc, "import"); + + rsc->tex_format = vc5_get_tex_format(prsc->format); + + DBG(V3D_DEBUG_SURFACE, + "rsc import %p (format %s), %dx%d: " + "level 0 (R) -> stride %d@0x%08x\n", + rsc, util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + slice->stride, slice->offset); + + if (whandle->stride != slice->stride) { + static bool warned = false; + if (!warned) { + warned = true; + fprintf(stderr, + "Attempting to import %dx%d %s with " + "unsupported stride %d instead of %d\n", + prsc->width0, prsc->height0, + util_format_short_name(prsc->format), + whandle->stride, + slice->stride); + } + goto fail; + } + + return prsc; + +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_surface * +vc5_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ + struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface); + struct vc5_resource *rsc = vc5_resource(ptex); + + if (!surface) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + surface->offset = (rsc->slices[level].offset + + psurf->u.tex.first_layer * rsc->cube_map_stride); + surface->tiling = rsc->slices[level].tiling; + surface->format = vc5_get_rt_format(psurf->format); + + if (util_format_is_depth_or_stencil(psurf->format)) { + switch (psurf->format) { + case PIPE_FORMAT_Z16_UNORM: + surface->internal_type = INTERNAL_TYPE_DEPTH_16; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + surface->internal_type = INTERNAL_TYPE_DEPTH_32F; + break; + default: + surface->internal_type = INTERNAL_TYPE_DEPTH_24; + } + } else { + uint32_t bpp, type; + vc5_get_internal_type_bpp_for_output_format(surface->format, + &type, &bpp); + surface->internal_type = type; + surface->internal_bpp = bpp; + } + + return &surface->base; +} + +static void +vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} + +static void +vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + /* All calls to flush_resource are followed by a flush of the context, + * so there's nothing to do. + */ +} + +void +vc5_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create_with_modifiers = + vc5_resource_create_with_modifiers; + pscreen->resource_create = vc5_resource_create; + pscreen->resource_from_handle = vc5_resource_from_handle; + pscreen->resource_get_handle = vc5_resource_get_handle; + pscreen->resource_destroy = vc5_resource_destroy; +} + +void +vc5_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = vc5_resource_transfer_map; + pctx->transfer_flush_region = u_default_transfer_flush_region; + pctx->transfer_unmap = vc5_resource_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->create_surface = vc5_create_surface; + pctx->surface_destroy = vc5_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = vc5_blit; + pctx->flush_resource = vc5_flush_resource; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h new file mode 100644 index 000000000..e65cb8c8f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h @@ -0,0 +1,158 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_RESOURCE_H +#define VC5_RESOURCE_H + +#include "vc5_screen.h" +#include "util/u_transfer.h" + +/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These + * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB + * page. Those pages are then arranged left-to-right, top-to-bottom, to cover + * an image. + * + * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte + * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). + */ + +/** + * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum vc5_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + VC5_TILING_RASTER, + + /* Single line of u-tiles. */ + VC5_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_XOR, +}; + +struct vc5_transfer { + struct pipe_transfer base; + void *map; + + struct pipe_resource *ss_resource; + struct pipe_box ss_box; +}; + +struct vc5_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t size; + enum vc5_tiling_mode tiling; +}; + +struct vc5_surface { + struct pipe_surface base; + uint32_t offset; + enum vc5_tiling_mode tiling; + /** + * Output image format for TILE_RENDERING_MODE_CONFIGURATION + */ + uint8_t format; + + /** + * Internal format of the tile buffer for + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_type; + + /** + * internal bpp value (0=32bpp, 2=128bpp) for color buffers in + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_bpp; +}; + +struct vc5_resource { + struct pipe_resource base; + struct vc5_bo *bo; + struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS]; + uint32_t cube_map_stride; + int cpp; + bool tiled; + /** One of V3D_TEXTURE_DATA_FORMAT_* */ + uint8_t tex_format; + + /** + * Number of times the resource has been written to. + * + * This is used to track whether we need to load the surface on first + * rendering. + */ + uint64_t writes; + + /** + * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL + * for which parts of the resource are defined. + * + * Used for avoiding fallback to quad clears for clearing just depth, + * when the stencil contents have never been initialized. Note that + * we're lazy and fields not present in the buffer (DEPTH in a color + * buffer) may get marked. + */ + uint32_t initialized_buffers; +}; + +static inline struct vc5_resource * +vc5_resource(struct pipe_resource *prsc) +{ + return (struct vc5_resource *)prsc; +} + +static inline struct vc5_surface * +vc5_surface(struct pipe_surface *psurf) +{ + return (struct vc5_surface *)psurf; +} + +static inline struct vc5_transfer * +vc5_transfer(struct pipe_transfer *ptrans) +{ + return (struct vc5_transfer *)ptrans; +} + +void vc5_resource_screen_init(struct pipe_screen *pscreen); +void vc5_resource_context_init(struct pipe_context *pctx); +struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl); + +#endif /* VC5_RESOURCE_H */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c new file mode 100644 index 000000000..66180d27e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c @@ -0,0 +1,620 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "os/os_misc.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/ralloc.h" + +#include <xf86drm.h> +#include "vc5_drm.h" +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_resource.h" +#include "compiler/v3d_compiler.h" + +static const char * +vc5_screen_get_name(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (!screen->name) { + screen->name = ralloc_asprintf(screen, + "VC5 V3D %d.%d", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + } + + return screen->name; +} + +static const char * +vc5_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "Broadcom"; +} + +static void +vc5_screen_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + util_hash_table_destroy(screen->bo_handles); + vc5_bufmgr_destroy(pscreen); + slab_destroy_parent(&screen->transfer_pool); + + if (using_vc5_simulator) + vc5_simulator_destroy(screen); + + v3d_compiler_free(screen->compiler); + + close(screen->fd); + ralloc_free(pscreen); +} + +static int +vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_SM3: + case PIPE_CAP_INDEP_BLEND_ENABLE: /* XXX */ + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 1; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 400; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + return 1; + + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 64; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + + /* Unsupported features. */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_CULL_DISTANCE: + case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: + case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_MAX_WINDOW_RECTANGLES: + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_INT64: + case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_DOUBLES: + case PIPE_CAP_BINDLESS_TEXTURE: + case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_TGSI_BALLOT: + case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: + case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: + case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_NATIVE_FENCE_FD: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: + case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_TILE_RASTER_ORDER: + return 0; + + /* Geometry shader output, unsupported. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return VC5_MAX_MIP_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 256; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + + /* Queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + case PIPE_CAP_VENDOR_ID: + return 0x14E4; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + + default: + fprintf(stderr, "unknown param %d\n", param); + return 0; + } +} + +static float +vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 32; + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 512.0f; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 0.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + default: + fprintf(stderr, "unknown paramf %d\n", param); + return 0; + } +} + +static int +vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return UINT_MAX; + + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VC5_MAX_FS_INPUTS / 4; + else + return 16; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return shader == PIPE_SHADER_FRAGMENT ? 4 : 8; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + return 0; + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return VC5_MAX_TEXTURE_SAMPLERS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; + default: + fprintf(stderr, "unknown shader param %d\n", param); + return 0; + } + return 0; +} + +static boolean +vc5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) + return FALSE; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + !util_format_is_supported(format, usage)) { + return FALSE; + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8_SSCALED: + retval |= PIPE_BIND_VERTEX_BUFFER; + break; + default: + break; + } + } + + if ((usage & PIPE_BIND_RENDER_TARGET) && + vc5_rt_format_supported(format)) { + retval |= PIPE_BIND_RENDER_TARGET; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + vc5_tex_format_supported(format)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_Z16_UNORM || + format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT || + format == PIPE_FORMAT_I32_UINT)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + +#if 0 + if (retval != usage) { + fprintf(stderr, + "not supported: format=%s, target=%d, sample_count=%d, " + "usage=0x%x, retval=0x%x\n", util_format_name(format), + target, sample_count, usage, retval); + } +#endif + + return retval == usage; +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static bool +vc5_get_device_info(struct vc5_screen *screen) +{ + struct drm_vc5_get_param ident0 = { + .param = DRM_VC5_PARAM_V3D_CORE0_IDENT0, + }; + struct drm_vc5_get_param ident1 = { + .param = DRM_VC5_PARAM_V3D_CORE0_IDENT1, + }; + int ret; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident0); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n", + strerror(errno)); + return false; + } + ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident1); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n", + strerror(errno)); + return false; + } + + uint32_t major = (ident0.value >> 24) & 0xff; + uint32_t minor = (ident1.value >> 0) & 0xf; + screen->devinfo.ver = major * 10 + minor; + + if (screen->devinfo.ver != 33) { + fprintf(stderr, + "V3D %d.%d not supported by this version of Mesa.\n", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + return false; + } + + return true; +} + +static const void * +vc5_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &v3d_nir_options; +} + +struct pipe_screen * +vc5_screen_create(int fd) +{ + struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen); + struct pipe_screen *pscreen; + + pscreen = &screen->base; + + pscreen->destroy = vc5_screen_destroy; + pscreen->get_param = vc5_screen_get_param; + pscreen->get_paramf = vc5_screen_get_paramf; + pscreen->get_shader_param = vc5_screen_get_shader_param; + pscreen->context_create = vc5_context_create; + pscreen->is_format_supported = vc5_screen_is_format_supported; + + screen->fd = fd; + list_inithead(&screen->bo_cache.time_list); + (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); + +#if defined(USE_VC5_SIMULATOR) + vc5_simulator_init(screen); +#endif + + if (!vc5_get_device_info(screen)) + goto fail; + + slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16); + + vc5_fence_init(screen); + + v3d_process_debug_variable(); + + vc5_resource_screen_init(pscreen); + + screen->compiler = v3d_compiler_init(&screen->devinfo); + + pscreen->get_name = vc5_screen_get_name; + pscreen->get_vendor = vc5_screen_get_vendor; + pscreen->get_device_vendor = vc5_screen_get_vendor; + pscreen->get_compiler_options = vc5_screen_get_compiler_options; + + return pscreen; + +fail: + close(fd); + ralloc_free(pscreen); + return NULL; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h new file mode 100644 index 000000000..28925d791 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h @@ -0,0 +1,100 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_SCREEN_H +#define VC5_SCREEN_H + +#include "pipe/p_screen.h" +#include "os/os_thread.h" +#include "state_tracker/drm_driver.h" +#include "util/list.h" +#include "util/slab.h" +#include "broadcom/common/v3d_debug.h" +#include "broadcom/common/v3d_device_info.h" + +struct vc5_bo; + +#define VC5_MAX_MIP_LEVELS 12 +#define VC5_MAX_TEXTURE_SAMPLERS 32 +#define VC5_MAX_SAMPLES 4 +#define VC5_MAX_DRAW_BUFFERS 4 +#define VC5_MAX_ATTRIBUTES 16 + +struct vc5_simulator_file; + +struct vc5_screen { + struct pipe_screen base; + int fd; + + struct v3d_device_info devinfo; + + const char *name; + + /** The last seqno we've completed a wait for. + * + * This lets us slightly optimize our waits by skipping wait syscalls + * if we know the job's already done. + */ + uint64_t finished_seqno; + + struct slab_parent_pool transfer_pool; + + struct vc5_bo_cache { + /** List of struct vc5_bo freed, by age. */ + struct list_head time_list; + /** List of struct vc5_bo freed, per size, by age. */ + struct list_head *size_list; + uint32_t size_list_size; + + mtx_t lock; + + uint32_t bo_size; + uint32_t bo_count; + } bo_cache; + + const struct v3d_compiler *compiler; + + struct util_hash_table *bo_handles; + mtx_t bo_handles_mutex; + + uint32_t bo_size; + uint32_t bo_count; + + struct vc5_simulator_file *sim_file; +}; + +static inline struct vc5_screen * +vc5_screen(struct pipe_screen *screen) +{ + return (struct vc5_screen *)screen; +} + +struct pipe_screen *vc5_screen_create(int fd); + +void +vc5_fence_init(struct vc5_screen *screen); + +struct vc5_fence * +vc5_fence_create(struct vc5_screen *screen, uint64_t seqno); + +#endif /* VC5_SCREEN_H */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c b/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c new file mode 100644 index 000000000..fc6a38d37 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c @@ -0,0 +1,736 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator.c + * + * Implements VC5 simulation on top of a non-VC5 GEM fd. + * + * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on + * top of the simpenrose software simulator. Generally, VC5 driver BOs have a + * GEM-side copy of their contents and a simulator-side memory area that the + * GEM contents get copied into during simulation. Once simulation is done, + * the simulator's data is copied back out to the GEM BOs, so that rendering + * appears on the screen as if actual hardware rendering had been done. + * + * One of the limitations of this code is that we shouldn't really need a + * GEM-side BO for non-window-system BOs. However, do we need unique BO + * handles for each of our GEM bos so that this file can look up its state + * from the handle passed in at submit ioctl time (also, a couple of places + * outside of this file still call ioctls directly on the fd). + * + * Another limitation is that BO import doesn't work unless the underlying + * window system's BO size matches what VC5 is going to use, which of course + * doesn't work out in practice. This means that for now, only DRI3 (VC5 + * makes the winsys BOs) is supported, not DRI2 (window system makes the winys + * BOs). + */ + +#ifdef USE_VC5_SIMULATOR + +#include <sys/mman.h> +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "util/u_memory.h" +#include "util/u_mm.h" + +#define HW_REGISTER_RO(x) (x) +#define HW_REGISTER_RW(x) (x) +#include "libs/core/v3d/registers/3.3.0.0/v3d.h" + +#include "vc5_screen.h" +#include "vc5_context.h" +#define V3D_TECH_VERSION 3 +#define V3D_REVISION 3 +#define V3D_SUB_REV 0 +#define V3D_HIDDEN_REV 0 +#undef unreachable +#include "v3d_hw_auto.h" + +/** Global (across GEM fds) state for the simulator */ +static struct vc5_simulator_state { + mtx_t mutex; + + struct v3d_hw *v3d; + + /* Base virtual address of the heap. */ + void *mem; + /* Base hardware address of the heap. */ + uint32_t mem_base; + /* Size of the heap. */ + size_t mem_size; + + struct mem_block *heap; + struct mem_block *overflow; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *fd_map; + + int refcount; +} sim_state = { + .mutex = _MTX_INITIALIZER_NP, +}; + +/** Per-GEM-fd state for the simulator. */ +struct vc5_simulator_file { + int fd; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *bo_map; + + struct mem_block *gmp; + void *gmp_vaddr; +}; + +/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */ +struct vc5_simulator_bo { + struct vc5_simulator_file *file; + + /** Area for this BO within sim_state->mem */ + struct mem_block *block; + uint32_t size; + void *vaddr; + + void *winsys_map; + uint32_t winsys_stride; + + int handle; +}; + +static void * +int_to_key(int key) +{ + return (void *)(uintptr_t)key; +} + +static struct vc5_simulator_file * +vc5_get_simulator_file_for_fd(int fd) +{ + struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map, + int_to_key(fd + 1)); + return entry ? entry->data : NULL; +} + +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + +/* 128kb */ +#define GMP_ALIGN2 17 + +/** + * Sets the range of GPU virtual address space to have the given GMP + * permissions (bit 0 = read, bit 1 = write, write-only forbidden). + */ +static void +set_gmp_flags(struct vc5_simulator_file *file, + uint32_t offset, uint32_t size, uint32_t flag) +{ + assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0); + int gmp_offset = offset >> GMP_ALIGN2; + int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2; + uint32_t *gmp = file->gmp_vaddr; + + assert(flag <= 0x3); + + for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) { + int32_t bitshift = (i % 16) * 2; + gmp[i / 16] &= ~(0x3 << bitshift); + gmp[i / 16] |= flag << bitshift; + } +} + +/** + * Allocates space in simulator memory and returns a tracking struct for it + * that also contains the drm_gem_cma_object struct. + */ +static struct vc5_simulator_bo * +vc5_create_simulator_bo(int fd, int handle, unsigned size) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = rzalloc(file, + struct vc5_simulator_bo); + size = align(size, 4096); + + sim_bo->file = file; + sim_bo->handle = handle; + + mtx_lock(&sim_state.mutex); + sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0); + mtx_unlock(&sim_state.mutex); + assert(sim_bo->block); + + set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); + + sim_bo->size = size; + sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; + memset(sim_bo->vaddr, 0xd0, size); + + *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; + + /* A handle of 0 is used for vc5_gem.c internal allocations that + * don't need to go in the lookup table. + */ + if (handle != 0) { + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(file->bo_map, int_to_key(handle), + sim_bo); + mtx_unlock(&sim_state.mutex); + } + + return sim_bo; +} + +static void +vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo) +{ + struct vc5_simulator_file *sim_file = sim_bo->file; + + if (sim_bo->winsys_map) + munmap(sim_bo->winsys_map, sim_bo->size); + + set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); + + mtx_lock(&sim_state.mutex); + u_mmFreeMem(sim_bo->block); + if (sim_bo->handle) { + struct hash_entry *entry = + _mesa_hash_table_search(sim_file->bo_map, + int_to_key(sim_bo->handle)); + _mesa_hash_table_remove(sim_file->bo_map, entry); + } + mtx_unlock(&sim_state.mutex); + ralloc_free(sim_bo); +} + +static struct vc5_simulator_bo * +vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle) +{ + mtx_lock(&sim_state.mutex); + struct hash_entry *entry = + _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle)); + mtx_unlock(&sim_state.mutex); + + return entry ? entry->data : NULL; +} + +static int +vc5_simulator_pin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + vc5_bo_map(bo); + memcpy(sim_bo->vaddr, bo->map, bo->size); + } + + return 0; +} + +static int +vc5_simulator_unpin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + assert(*(uint32_t *)(sim_bo->vaddr + + sim_bo->size) == BO_SENTINEL); + + vc5_bo_map(bo); + memcpy(bo->map, sim_bo->vaddr, bo->size); + } + + return 0; +} + +#if 0 +static void +vc5_dump_to_file(struct vc5_exec_info *exec) +{ + static int dumpno = 0; + struct drm_vc5_get_hang_state *state; + struct drm_vc5_get_hang_state_bo *bo_state; + unsigned int dump_version = 0; + + if (!(vc5_debug & VC5_DEBUG_DUMP)) + return; + + state = calloc(1, sizeof(*state)); + + int unref_count = 0; + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + unref_count++; + } + + /* Add one more for the overflow area that isn't wrapped in a BO. */ + state->bo_count = exec->bo_count + unref_count + 1; + bo_state = calloc(state->bo_count, sizeof(*bo_state)); + + char *filename = NULL; + asprintf(&filename, "vc5-dri-%d.dump", dumpno++); + FILE *f = fopen(filename, "w+"); + if (!f) { + fprintf(stderr, "Couldn't open %s: %s", filename, + strerror(errno)); + return; + } + + fwrite(&dump_version, sizeof(dump_version), 1, f); + + state->ct0ca = exec->ct0ca; + state->ct0ea = exec->ct0ea; + state->ct1ca = exec->ct1ca; + state->ct1ea = exec->ct1ea; + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + fwrite(state, sizeof(*state), 1, f); + + int i; + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + bo_state[i].handle = i; /* Not used by the parser. */ + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + bo_state[i].handle = 0; + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + i++; + } + + /* Add the static overflow memory area. */ + bo_state[i].handle = exec->bo_count; + bo_state[i].paddr = sim_state.overflow->ofs; + bo_state[i].size = sim_state.overflow->size; + i++; + + fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + + for (int i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + void *overflow = calloc(1, sim_state.overflow->size); + fwrite(overflow, 1, sim_state.overflow->size, f); + free(overflow); + + free(state); + free(bo_state); + fclose(f); +} +#endif + +#define V3D_WRITE(reg, val) v3d_hw_write_reg(sim_state.v3d, reg, val) +#define V3D_READ(reg) v3d_hw_read_reg(sim_state.v3d, reg) + +static void +vc5_flush_l3(void) +{ + if (!v3d_hw_has_gca(sim_state.v3d)) + return; + + uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); + + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); +} + +/* Invalidates the L2 cache. This is a read-only cache. */ +static void +vc5_flush_l2(void) +{ + V3D_WRITE(V3D_CTL_0_L2CACTL, + V3D_CTL_0_L2CACTL_L2CCLR_SET | + V3D_CTL_0_L2CACTL_L2CENA_SET); +} + +/* Invalidates texture L2 cachelines */ +static void +vc5_flush_l2t(void) +{ + V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); + V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); + V3D_WRITE(V3D_CTL_0_L2TCACTL, + V3D_CTL_0_L2TCACTL_L2TFLS_SET | + (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +vc5_flush_slices(void) +{ + V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); +} + +static void +vc5_flush_caches(void) +{ + vc5_flush_l3(); + vc5_flush_l2(); + vc5_flush_l2t(); + vc5_flush_slices(); +} + +int +vc5_simulator_flush(struct vc5_context *vc5, + struct drm_vc5_submit_cl *submit, struct vc5_job *job) +{ + struct vc5_screen *screen = vc5->screen; + int fd = screen->fd; + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]); + struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL; + struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL; + uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; + uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; + uint32_t row_len = MIN2(sim_stride, winsys_stride); + int ret; + + if (ctex && csim_bo->winsys_map) { +#if 0 + fprintf(stderr, "%dx%d %d %d %d\n", + ctex->base.b.width0, ctex->base.b.height0, + winsys_stride, + sim_stride, + ctex->bo->size); +#endif + + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(ctex->bo->map + y * sim_stride, + csim_bo->winsys_map + y * winsys_stride, + row_len); + } + } + + ret = vc5_simulator_pin_bos(fd, job); + if (ret) + return ret; + + //vc5_dump_to_file(&exec); + + /* Completely reset the GMP. */ + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CFG, + V3D_GMP_0_CFG_PROTENABLE_SET); + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_TABLE_ADDR, file->gmp->ofs); + v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CLEAR_LOAD, ~0); + while (v3d_hw_read_reg(sim_state.v3d, V3D_GMP_0_STATUS) & + V3D_GMP_0_STATUS_CFG_BUSY_SET) { + ; + } + + vc5_flush_caches(); + + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QBA, submit->bcl_start); + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QEA, submit->bcl_end); + + /* Wait for bin to complete before firing render, as it seems the + * simulator doesn't implement the semaphores. + */ + while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0EA)) { + v3d_hw_tick(sim_state.v3d); + } + + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QBA, submit->rcl_start); + v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QEA, submit->rcl_end); + + while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1EA) || + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1CA) != + v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1EA)) { + v3d_hw_tick(sim_state.v3d); + } + + ret = vc5_simulator_unpin_bos(fd, job); + if (ret) + return ret; + + if (ctex && csim_bo->winsys_map) { + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(csim_bo->winsys_map + y * winsys_stride, + ctex->bo->map + y * sim_stride, + row_len); + } + } + + return 0; +} + +/** + * Map the underlying GEM object from the real hardware GEM handle. + */ +static void * +vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo) +{ + int ret; + void *map; + + struct drm_mode_map_dumb map_dumb = { + .handle = sim_bo->handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, map_dumb.offset); + if (map == MAP_FAILED) { + fprintf(stderr, + "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + sim_bo->handle, (long long)map_dumb.offset, + (int)sim_bo->size); + abort(); + } + + return map; +} + +/** + * Do fixups after a BO has been opened from a handle. + * + * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE + * time, but we're still using drmPrimeFDToHandle() so we have this helper to + * be called afterward instead. + */ +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size) +{ + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, handle, size); + + sim_bo->winsys_stride = winsys_stride; + sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo); +} + +/** + * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. + * + * Making a VC5 BO is just a matter of making a corresponding BO on the host. + */ +static int +vc5_simulator_create_bo_ioctl(int fd, struct drm_vc5_create_bo *args) +{ + int ret; + struct drm_mode_create_dumb create = { + .width = 128, + .bpp = 8, + .height = (args->size + 127) / 128, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + assert(create.size >= args->size); + + args->handle = create.handle; + + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, create.handle, args->size); + + args->offset = sim_bo->block->ofs; + + return ret; +} + +/** + * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. + * + * We just pass this straight through to dumb mmap. + */ +static int +vc5_simulator_mmap_bo_ioctl(int fd, struct drm_vc5_mmap_bo *args) +{ + int ret; + struct drm_mode_map_dumb map = { + .handle = args->handle, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + args->offset = map.offset; + + return ret; +} + +static int +vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) +{ + /* Free the simulator's internal tracking. */ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + vc5_free_simulator_bo(sim_bo); + + /* Pass the call on down. */ + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); +} + +static int +vc5_simulator_get_param_ioctl(int fd, struct drm_vc5_get_param *args) +{ + static const uint32_t reg_map[] = { + [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, + [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, + [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, + [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, + [DRM_VC5_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, + [DRM_VC5_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, + [DRM_VC5_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, + }; + + if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { + args->value = v3d_hw_read_reg(sim_state.v3d, + reg_map[args->param]); + return 0; + } + + fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", + (long long)args->value); + abort(); +} + +int +vc5_simulator_ioctl(int fd, unsigned long request, void *args) +{ + switch (request) { + case DRM_IOCTL_VC5_CREATE_BO: + return vc5_simulator_create_bo_ioctl(fd, args); + case DRM_IOCTL_VC5_MMAP_BO: + return vc5_simulator_mmap_bo_ioctl(fd, args); + + case DRM_IOCTL_VC5_WAIT_BO: + case DRM_IOCTL_VC5_WAIT_SEQNO: + /* We do all of the vc5 rendering synchronously, so we just + * return immediately on the wait ioctls. This ignores any + * native rendering to the host BO, so it does mean we race on + * front buffer rendering. + */ + return 0; + + case DRM_IOCTL_VC5_GET_PARAM: + return vc5_simulator_get_param_ioctl(fd, args); + + case DRM_IOCTL_GEM_CLOSE: + return vc5_simulator_gem_close_ioctl(fd, args); + + case DRM_IOCTL_GEM_OPEN: + case DRM_IOCTL_GEM_FLINK: + return drmIoctl(fd, request, args); + default: + fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request); + abort(); + } +} + +static void +vc5_simulator_init_global(void) +{ + mtx_lock(&sim_state.mutex); + if (sim_state.refcount++) { + mtx_unlock(&sim_state.mutex); + return; + } + + sim_state.v3d = v3d_hw_auto_new(NULL); + v3d_hw_alloc_mem(sim_state.v3d, 256 * 1024 * 1024); + sim_state.mem_base = + v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size, + &sim_state.mem); + + sim_state.heap = u_mmInit(0, sim_state.mem_size); + + /* Make a block of 0xd0 at address 0 to make sure we don't screw up + * and land there. + */ + struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0); + memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096); + + mtx_unlock(&sim_state.mutex); + + sim_state.fd_map = + _mesa_hash_table_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +void +vc5_simulator_init(struct vc5_screen *screen) +{ + vc5_simulator_init_global(); + + screen->sim_file = rzalloc(screen, struct vc5_simulator_file); + struct vc5_simulator_file *sim_file = screen->sim_file; + + screen->sim_file->bo_map = + _mesa_hash_table_create(screen->sim_file, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1), + screen->sim_file); + mtx_unlock(&sim_state.mutex); + + sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0); + sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs - + sim_state.mem_base); +} + +void +vc5_simulator_destroy(struct vc5_screen *screen) +{ + mtx_lock(&sim_state.mutex); + if (!--sim_state.refcount) { + _mesa_hash_table_destroy(sim_state.fd_map, NULL); + u_mmDestroy(sim_state.heap); + /* No memsetting the struct, because it contains the mutex. */ + sim_state.mem = NULL; + } + mtx_unlock(&sim_state.mutex); +} + +#endif /* USE_VC5_SIMULATOR */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_state.c b/lib/mesa/src/gallium/drivers/vc5/vc5_state.c new file mode 100644 index 000000000..eebf94b4b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_state.c @@ -0,0 +1,749 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_half.h" +#include "util/u_helpers.h" + +#include "vc5_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void * +vc5_generic_cso_state_create(const void *src, uint32_t size) +{ + void *dst = calloc(1, size); + if (!dst) + return NULL; + memcpy(dst, src, size); + return dst; +} + +static void +vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +vc5_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) { + vc5->blend_color.hf[i] = + util_float_to_half(blend_color->color[i]); + } + vc5->dirty |= VC5_DIRTY_BLEND_COLOR; +} + +static void +vc5_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stencil_ref = *stencil_ref; + vc5->dirty |= VC5_DIRTY_STENCIL_REF; +} + +static void +vc5_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->clip = *clip; + vc5->dirty |= VC5_DIRTY_CLIP; +} + +static void +vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); + vc5->dirty |= VC5_DIRTY_SAMPLE_MASK; +} + +static uint16_t +float_to_187_half(float f) +{ + return fui(f) >> 16; +} + +static void * +vc5_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct vc5_rasterizer_state *so; + + so = CALLOC_STRUCT(vc5_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, + * BCM21553). + */ + so->point_size = MAX2(cso->point_size, .125f); + + if (cso->offset_tri) { + so->offset_units = float_to_187_half(cso->offset_units); + so->offset_factor = float_to_187_half(cso->offset_scale); + } + + return so; +} + +/* Blend state is baked into shaders. */ +static void * +vc5_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + return vc5_generic_cso_state_create(cso, sizeof(*cso)); +} + +static void * +vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct vc5_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->depth.enabled) { + /* We only handle early Z in the < direction because otherwise + * we'd have to runtime guess which direction to set in the + * render config. + */ + so->early_z_enable = + ((cso->depth.func == PIPE_FUNC_LESS || + cso->depth.func == PIPE_FUNC_LEQUAL) && + (!cso->stencil[0].enabled || + (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP && + (!cso->stencil[1].enabled || + cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP)))); + } + + return so; +} + +static void +vc5_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stipple = *stipple; + vc5->dirty |= VC5_DIRTY_STIPPLE; +} + +static void +vc5_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->scissor = *scissor; + vc5->dirty |= VC5_DIRTY_SCISSOR; +} + +static void +vc5_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->viewport = *viewport; + vc5->dirty |= VC5_DIRTY_VIEWPORT; +} + +static void +vc5_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, + start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + vc5->dirty |= VC5_DIRTY_VTXBUF; +} + +static void +vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend = hwcso; + vc5->dirty |= VC5_DIRTY_BLEND; +} + +static void +vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_rasterizer_state *rast = hwcso; + + if (vc5->rasterizer && rast && + vc5->rasterizer->base.flatshade != rast->base.flatshade) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + vc5->rasterizer = hwcso; + vc5->dirty |= VC5_DIRTY_RASTERIZER; +} + +static void +vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->zsa = hwcso; + vc5->dirty |= VC5_DIRTY_ZSA; +} + +static void * +vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + for (int i = 0; i < so->num_elements; i++) { + const struct pipe_vertex_element *elem = &elements[i]; + const struct util_format_description *desc = + util_format_description(elem->src_format); + uint32_t r_size = desc->channel[0].size; + + struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = { + /* vec_size == 0 means 4 */ + .vec_size = desc->nr_channels & 3, + .signed_int_type = (desc->channel[0].type == + UTIL_FORMAT_TYPE_SIGNED), + + .normalized_int_type = desc->channel[0].normalized, + .read_as_int_uint = desc->channel[0].pure_integer, + .instance_divisor = elem->instance_divisor, + }; + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr_unpacked.type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr_unpacked.type = ATTRIBUTE_HALF_FLOAT; + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr_unpacked.type = ATTRIBUTE_INT; + break; + case 16: + attr_unpacked.type = ATTRIBUTE_SHORT; + break; + case 10: + attr_unpacked.type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr_unpacked.type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr_unpacked.type = ATTRIBUTE_BYTE; + abort(); + } + break; + + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + abort(); + } + + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(NULL, + (uint8_t *)&so->attrs[i * size], + &attr_unpacked); + } + + /* Set up the default attribute values in case any of the vertex + * elements use them. + */ + so->default_attribute_values = vc5_bo_alloc(vc5->screen, + VC5_MAX_ATTRIBUTES * + 4 * sizeof(float), + "default attributes"); + uint32_t *attrs = vc5_bo_map(so->default_attribute_values); + for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { + attrs[i * 4 + 0] = 0; + attrs[i * 4 + 1] = 0; + attrs[i * 4 + 2] = 0; + if (i < so->num_elements && + util_format_is_pure_integer(so->pipe[i].src_format)) { + attrs[i * 4 + 3] = 1; + } else { + attrs[i * 4 + 3] = fui(1.0); + } + } + + return so; +} + +static void +vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->vtx = hwcso; + vc5->dirty |= VC5_DIRTY_VTXSTATE; +} + +static void +vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + const struct pipe_constant_buffer *cb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; + + util_copy_constant_buffer(&so->cb[index], cb); + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + return; + } + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + vc5->dirty |= VC5_DIRTY_CONSTBUF; +} + +static void +vc5_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct pipe_framebuffer_state *cso = &vc5->framebuffer; + unsigned i; + + vc5->job = NULL; + + for (i = 0; i < framebuffer->nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); + for (; i < vc5->framebuffer.nr_cbufs; i++) + pipe_surface_reference(&cso->cbufs[i], NULL); + + cso->nr_cbufs = framebuffer->nr_cbufs; + + pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); + + cso->width = framebuffer->width; + cso->height = framebuffer->height; + + vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; +} + +static struct vc5_texture_stateobj * +vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) +{ + switch (shader) { + case PIPE_SHADER_FRAGMENT: + vc5->dirty |= VC5_DIRTY_FRAGTEX; + return &vc5->fragtex; + break; + case PIPE_SHADER_VERTEX: + vc5->dirty |= VC5_DIRTY_VERTTEX; + return &vc5->verttex; + break; + default: + fprintf(stderr, "Unknown shader target %d\n", shader); + abort(); + } +} + +static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) +{ + switch (pipe_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return 0; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return 1; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return 2; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return 3; + case PIPE_TEX_WRAP_CLAMP: + return (using_nearest ? 1 : 3); + default: + unreachable("Unknown wrap mode"); + } +} + + +static void * +vc5_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); + + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + bool either_nearest = + (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || + cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = { + .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest), + .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest), + .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest), + }; + V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, + (uint8_t *)&so->p0, + &p0_unpacked); + + struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { + cl_packet_header(TEXTURE_SHADER_STATE), + + .min_level_of_detail = MAX2(cso->min_lod, 0.0), + .depth_compare_function = cso->compare_func, + .fixed_bias = cso->lod_bias, + }; + STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == + cl_packet_length(TEXTURE_SHADER_STATE)); + cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, + &state_unpacked); + + return so; +} + +static void +vc5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + + assert(start == 0); + unsigned i; + unsigned new_nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + stage_tex->samplers[i] = hwcso[i]; + } + + for (; i < stage_tex->num_samplers; i++) { + stage_tex->samplers[i] = NULL; + } + + stage_tex->num_samplers = new_nr; +} + +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +static struct pipe_sampler_view * +vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); + struct vc5_resource *rsc = vc5_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + + pipe_reference(NULL, &prsc->reference); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + }; + + unpacked.return_word_0_of_texture_data = true; + if (vc5_get_tex_return_size(cso->format) == 16) { + unpacked.return_word_1_of_texture_data = true; + } else { + int chans = vc5_get_tex_return_channels(cso->format); + + if (chans > 1) + unpacked.return_word_1_of_texture_data = true; + if (chans > 2) + unpacked.return_word_2_of_texture_data = true; + if (chans > 3) + unpacked.return_word_3_of_texture_data = true; + } + + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, + (uint8_t *)&so->p1, + &unpacked); + + /* Compute the sampler view's swizzle up front. This will be plugged + * into either the sampler (for 16-bit returns) or the shader's + * texture key (for 32) + */ + uint8_t view_swizzle[4] = { + cso->swizzle_r, + cso->swizzle_g, + cso->swizzle_b, + cso->swizzle_a + }; + const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format); + util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); + + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { + cl_packet_header(TEXTURE_SHADER_STATE), + + .image_width = prsc->width0, + .image_height = prsc->height0, + .image_depth = prsc->depth0, + + .texture_type = rsc->tex_format, + .srgb = util_format_is_srgb(cso->format), + + .base_level = cso->u.tex.first_level, + .array_stride_64_byte_aligned = rsc->cube_map_stride / 64, + }; + + /* Note: Contrary to the docs, the swizzle still applies even + * if the return size is 32. It's just that you probably want + * to swizzle in the shader, because you need the Y/Z/W + * channels to be defined. + */ + if (vc5_get_tex_return_size(cso->format) != 32) { + state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]); + state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]); + state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]); + state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]); + } else { + state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } + + /* XXX: While we need to use this flag to enable tiled + * resource sharing (even a small shared buffer should be UIF, + * not UBLINEAR or raster), this is also at the moment + * patching up the fact that our resource layout's decisions + * about XOR don't quite match the HW's. + */ + switch (rsc->slices[0].tiling) { + case VC5_TILING_UIF_NO_XOR: + case VC5_TILING_UIF_XOR: + state_unpacked.level_0_is_strictly_uif = true; + state_unpacked.level_0_xor_enable = false; + break; + default: + break; + } + + STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == + cl_packet_length(TEXTURE_SHADER_STATE)); + cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, + &state_unpacked); + + return &so->base; +} + +static void +vc5_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + free(view); +} + +static void +vc5_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); + } + + for (; i < stage_tex->num_textures; i++) { + pipe_sampler_view_reference(&stage_tex->textures[i], NULL); + } + + stage_tex->num_textures = new_nr; +} + +static struct pipe_stream_output_target * +vc5_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +vc5_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +vc5_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct vc5_context *ctx = vc5_context(pctx); + struct vc5_streamout_stateobj *so = &ctx->streamout; + unsigned i; + + assert(num_targets <= ARRAY_SIZE(so->targets)); + + for (i = 0; i < num_targets; i++) + pipe_so_target_reference(&so->targets[i], targets[i]); + + for (; i < so->num_targets; i++) + pipe_so_target_reference(&so->targets[i], NULL); + + so->num_targets = num_targets; + + ctx->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +vc5_state_init(struct pipe_context *pctx) +{ + pctx->set_blend_color = vc5_set_blend_color; + pctx->set_stencil_ref = vc5_set_stencil_ref; + pctx->set_clip_state = vc5_set_clip_state; + pctx->set_sample_mask = vc5_set_sample_mask; + pctx->set_constant_buffer = vc5_set_constant_buffer; + pctx->set_framebuffer_state = vc5_set_framebuffer_state; + pctx->set_polygon_stipple = vc5_set_polygon_stipple; + pctx->set_scissor_states = vc5_set_scissor_states; + pctx->set_viewport_states = vc5_set_viewport_states; + + pctx->set_vertex_buffers = vc5_set_vertex_buffers; + + pctx->create_blend_state = vc5_create_blend_state; + pctx->bind_blend_state = vc5_blend_state_bind; + pctx->delete_blend_state = vc5_generic_cso_state_delete; + + pctx->create_rasterizer_state = vc5_create_rasterizer_state; + pctx->bind_rasterizer_state = vc5_rasterizer_state_bind; + pctx->delete_rasterizer_state = vc5_generic_cso_state_delete; + + pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state; + pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete; + + pctx->create_vertex_elements_state = vc5_vertex_state_create; + pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete; + pctx->bind_vertex_elements_state = vc5_vertex_state_bind; + + pctx->create_sampler_state = vc5_create_sampler_state; + pctx->delete_sampler_state = vc5_generic_cso_state_delete; + pctx->bind_sampler_states = vc5_sampler_states_bind; + + pctx->create_sampler_view = vc5_create_sampler_view; + pctx->sampler_view_destroy = vc5_sampler_view_destroy; + pctx->set_sampler_views = vc5_set_sampler_views; + + pctx->create_stream_output_target = vc5_create_stream_output_target; + pctx->stream_output_target_destroy = vc5_stream_output_target_destroy; + pctx->set_stream_output_targets = vc5_set_stream_output_targets; +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c new file mode 100644 index 000000000..279774e55 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c @@ -0,0 +1,402 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_tiling.c + * + * Handles information about the VC5 tiling formats, and loading and storing + * from them. + */ + +#include <stdint.h> +#include "vc5_screen.h" +#include "vc5_context.h" +#include "vc5_tiling.h" + +struct mb_layout { + /** Height, in pixels, of a macroblock (2x2 utiles, a UIF block). */ + uint32_t height; + /** Width, in pixels, of a macroblock (2x2 utiles, a UIF block). */ + uint32_t width; + uint32_t tile_row_stride; +}; + +enum { + MB_LAYOUT_8BPP, + MB_LAYOUT_16BPP, + MB_LAYOUT_32BPP, + MB_LAYOUT_64BPP, + MB_LAYOUT_128BPP, +}; + +static const struct mb_layout mb_layouts[] = { + [MB_LAYOUT_8BPP] = { .height = 16, .width = 16, .tile_row_stride = 8 }, + [MB_LAYOUT_16BPP] = { .height = 8, .width = 16, .tile_row_stride = 8 }, + [MB_LAYOUT_32BPP] = { .height = 8, .width = 8, .tile_row_stride = 4 }, + [MB_LAYOUT_64BPP] = { .height = 4, .width = 8, .tile_row_stride = 4 }, + [MB_LAYOUT_128BPP] = { .height = 4, .width = 4, .tile_row_stride = 2 }, +}; + +static const struct mb_layout * +get_mb_layout(int cpp) +{ + const struct mb_layout *layout = &mb_layouts[ffs(cpp) - 1]; + + /* Sanity check the table. XXX: We should de-duplicate. */ + assert(layout->width == vc5_utile_width(cpp) * 2); + assert(layout->tile_row_stride == vc5_utile_width(cpp)); + + return layout; +} + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + + assert(x < utile_w && y < utile_h); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + const struct mb_layout *layout = get_mb_layout(cpp); + uint32_t mb_width = layout->width; + uint32_t mb_height = layout->height; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < mb_height / 2; + bool left = mb_pixel_x < mb_width / 2; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t mb_tile_y = mb_pixel_y & ~(mb_height / 2); + uint32_t mb_tile_x = mb_pixel_x & ~(mb_width / 2); + uint32_t mb_tile_pixel_id = (mb_tile_y * + layout->tile_row_stride + + mb_tile_x); + + uint32_t mb_tile_addr = mb_tile_pixel_id * cpp; + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + mb_tile_addr); + + return mb_pixel_address; +} + +static inline void +vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +static inline void +vc5_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum vc5_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case VC5_TILING_UIF_NO_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_2_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_2_column_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_1_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_1_column_pixel_offset, + is_load); + break; + case VC5_TILING_LINEARTILE: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h new file mode 100644 index 000000000..d3cf48c45 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_TILING_H +#define VC5_TILING_H + +uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST; +uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST; +bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; +void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); +void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); +void vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); +void vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); + +#endif /* VC5_TILING_H */ diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c b/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c new file mode 100644 index 000000000..0c8bee517 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c @@ -0,0 +1,416 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_pack_color.h" +#include "util/format_srgb.h" + +#include "vc5_context.h" +#include "compiler/v3d_compiler.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#if 0 + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +static void +write_texture_border_color(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_state *sampler = texstate->samplers[unit]; + struct pipe_sampler_view *texture = texstate->textures[unit]; + struct vc5_resource *rsc = vc5_resource(texture->texture); + union util_color uc; + + const struct util_format_description *tex_format_desc = + util_format_description(texture->format); + + float border_color[4]; + for (int i = 0; i < 4; i++) + border_color[i] = sampler->border_color.f[i]; + if (util_format_is_srgb(texture->format)) { + for (int i = 0; i < 3; i++) + border_color[i] = + util_format_linear_to_srgb_float(border_color[i]); + } + + /* Turn the border color into the layout of channels that it would + * have when stored as texture contents. + */ + float storage_color[4]; + util_format_unswizzle_4f(storage_color, + border_color, + tex_format_desc->swizzle); + + /* Now, pack so that when the vc5_format-sampled texture contents are + * replaced with our border color, the vc5_get_format_swizzle() + * swizzling will get the right channels. + */ + if (util_format_is_depth_or_stencil(texture->format)) { + uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, + sampler->border_color.f[0]) << 8; + } else { + switch (rsc->vc5_format) { + default: + case VC5_TEXTURE_TYPE_RGBA8888: + util_pack_color(storage_color, + PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGBA4444: + util_pack_color(storage_color, + PIPE_FORMAT_A8B8G8R8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGB565: + util_pack_color(storage_color, + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_ALPHA: + uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; + break; + case VC5_TEXTURE_TYPE_LUMALPHA: + uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | + (float_to_ubyte(storage_color[0]) << 0)); + break; + } + } + + cl_aligned_u32(uniforms, uc.ui[0]); +} +#endif + +static uint32_t +get_texrect_scale(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + uint32_t dim; + + if (contents == QUNIFORM_TEXRECT_SCALE_X) + dim = texture->texture->width0; + else + dim = texture->texture->height0; + + return fui(1.0f / dim); +} + +static uint32_t +get_texture_size(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + + switch (contents) { + case QUNIFORM_TEXTURE_WIDTH: + return u_minify(texture->texture->width0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_HEIGHT: + return u_minify(texture->texture->height0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_DEPTH: + return u_minify(texture->texture->depth0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_ARRAY_SIZE: + return texture->texture->array_size; + case QUNIFORM_TEXTURE_LEVELS: + return (texture->u.tex.last_level - + texture->u.tex.first_level) + 1; + default: + unreachable("Bad texture size field"); + } +} + +static struct vc5_bo * +vc5_upload_ubo(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + const uint32_t *gallium_uniforms) +{ + if (!shader->prog_data.base->ubo_size) + return NULL; + + struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen, + shader->prog_data.base->ubo_size, + "ubo"); + void *data = vc5_bo_map(ubo); + for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) { + memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset, + ((const void *)gallium_uniforms + + shader->prog_data.base->ubo_ranges[i].src_offset), + shader->prog_data.base->ubo_ranges[i].size); + } + + return ubo; +} + +/** + * Writes the P0 (CFG_MODE=1) texture parameter. + * + * Some bits of this field are dependent on the type of sample being done by + * the shader, while other bits are dependent on the sampler state. We OR the + * two together here. + */ +static void +write_texture_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit, + uint32_t shader_data) +{ + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_u32(uniforms, shader_data | sampler->p0); +} + +static void +write_texture_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + .texture_state_record_base_address = texstate->texture_state[unit], + }; + + uint32_t packed; + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, + (uint8_t *)&packed, + &unpacked); + + cl_aligned_u32(uniforms, packed | sview->p1); +} + +struct vc5_cl_reloc +vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate) +{ + struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; + struct vc5_job *job = vc5->job; + const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; + struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms); + + /* We always need to return some space for uniforms, because the HW + * will be prefetching, even if we don't read any in the program. + */ + vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4); + + struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect); + vc5_bo_reference(uniform_stream.bo); + + struct vc5_cl_out *uniforms = + cl_start(&job->indirect); + + for (int i = 0; i < uinfo->count; i++) { + + switch (uinfo->contents[i]) { + case QUNIFORM_CONSTANT: + cl_aligned_u32(&uniforms, uinfo->data[i]); + break; + case QUNIFORM_UNIFORM: + cl_aligned_u32(&uniforms, + gallium_uniforms[uinfo->data[i]]); + break; + case QUNIFORM_VIEWPORT_X_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f); + break; + case QUNIFORM_VIEWPORT_Y_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f); + break; + + case QUNIFORM_VIEWPORT_Z_OFFSET: + cl_aligned_f(&uniforms, vc5->viewport.translate[2]); + break; + case QUNIFORM_VIEWPORT_Z_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[2]); + break; + + case QUNIFORM_USER_CLIP_PLANE: + cl_aligned_f(&uniforms, + vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + write_texture_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + +#if 0 + case QUNIFORM_TEXTURE_FIRST_LEVEL: + write_texture_first_level(job, &uniforms, texstate, + uinfo->data[i]); + break; +#endif + + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + cl_aligned_u32(&uniforms, + get_texrect_scale(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + cl_aligned_u32(&uniforms, + get_texture_size(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_STENCIL: + cl_aligned_u32(&uniforms, + vc5->zsa->stencil_uniforms[uinfo->data[i]] | + (uinfo->data[i] <= 1 ? + (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) : + 0)); + break; + + case QUNIFORM_ALPHA_REF: + cl_aligned_f(&uniforms, + vc5->zsa->base.alpha.ref_value); + break; + + case QUNIFORM_SAMPLE_MASK: + cl_aligned_u32(&uniforms, vc5->sample_mask); + break; + + case QUNIFORM_UBO_ADDR: + if (uinfo->data[i] == 0) { + cl_aligned_reloc(&job->indirect, &uniforms, + ubo, 0); + } else { + int ubo_index = uinfo->data[i]; + struct vc5_resource *rsc = + vc5_resource(cb->cb[ubo_index].buffer); + + cl_aligned_reloc(&job->indirect, &uniforms, + rsc->bo, + cb->cb[ubo_index].buffer_offset); + } + break; + + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXTURE_MSAA_ADDR: + case QUNIFORM_TEXTURE_BORDER_COLOR: + /* XXX */ + break; + + default: + assert(quniform_contents_is_texture_p0(uinfo->contents[i])); + + write_texture_p0(job, &uniforms, texstate, + uinfo->contents[i] - + QUNIFORM_TEXTURE_CONFIG_P0_0, + uinfo->data[i]); + break; + + } +#if 0 + uint32_t written_val = *((uint32_t *)uniforms - 1); + fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", + shader, i, __gen_address_offset(&uniform_stream) + i * 4, + written_val, uif(written_val)); +#endif + } + + cl_end(&job->indirect, uniforms); + + vc5_bo_unreference(&ubo); + + return uniform_stream; +} + +void +vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader) +{ + uint32_t dirty = 0; + + for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) { + switch (shader->prog_data.base->uniforms.contents[i]) { + case QUNIFORM_CONSTANT: + break; + case QUNIFORM_UNIFORM: + case QUNIFORM_UBO_ADDR: + dirty |= VC5_DIRTY_CONSTBUF; + break; + + case QUNIFORM_VIEWPORT_X_SCALE: + case QUNIFORM_VIEWPORT_Y_SCALE: + case QUNIFORM_VIEWPORT_Z_OFFSET: + case QUNIFORM_VIEWPORT_Z_SCALE: + dirty |= VC5_DIRTY_VIEWPORT; + break; + + case QUNIFORM_USER_CLIP_PLANE: + dirty |= VC5_DIRTY_CLIP; + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + case QUNIFORM_TEXTURE_BORDER_COLOR: + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXTURE_MSAA_ADDR: + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + /* We could flag this on just the stage we're + * compiling for, but it's not passed in. + */ + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + + case QUNIFORM_STENCIL: + case QUNIFORM_ALPHA_REF: + dirty |= VC5_DIRTY_ZSA; + break; + + case QUNIFORM_SAMPLE_MASK: + dirty |= VC5_DIRTY_SAMPLE_MASK; + break; + + default: + assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + } + } + + shader->uniform_dirty_bits = dirty; +} diff --git a/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp b/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp index 5d1a0e52c..3f89644d0 100644 --- a/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp +++ b/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp @@ -126,7 +126,8 @@ clSetEventCallback(cl_event d_ev, cl_int type, void *user_data) try { auto &ev = obj(d_ev); - if (!pfn_notify || type != CL_COMPLETE) + if (!pfn_notify || + (type != CL_COMPLETE && type != CL_SUBMITTED && type != CL_RUNNING)) throw error(CL_INVALID_VALUE); // Create a temporary soft event that depends on ev, with diff --git a/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp b/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp index f7046253b..34559042a 100644 --- a/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp +++ b/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp @@ -295,6 +295,9 @@ clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, &mem, obj_origin, obj_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -325,6 +328,9 @@ clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, ptr, {}, obj_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -362,6 +368,9 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, &mem, obj_origin, obj_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -399,6 +408,9 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, ptr, host_origin, host_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -504,6 +516,9 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, &img, src_origin, src_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -538,6 +553,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, ptr, {}, src_pitch, region)); + if (blocking) + hev().wait_signalled(); + ret_object(rd_ev, hev); return CL_SUCCESS; @@ -667,7 +685,11 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region); - ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps)); + auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps); + if (blocking) + hev().wait_signalled(); + + ret_object(rd_ev, hev); ret_error(r_errcode, CL_SUCCESS); return map; @@ -695,7 +717,11 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, void *map = img.resource(q).add_map(q, flags, blocking, origin, region); - ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps)); + auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps); + if (blocking) + hev().wait_signalled(); + + ret_object(rd_ev, hev); ret_error(r_errcode, CL_SUCCESS); return map; diff --git a/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp b/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp index 8275e16a4..cd5d78660 100644 --- a/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp +++ b/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp @@ -27,7 +27,7 @@ using namespace clover; event::event(clover::context &ctx, const ref_vector<event> &deps, action action_ok, action action_fail) : - context(ctx), wait_count(1), _status(0), + context(ctx), _wait_count(1), _status(0), action_ok(action_ok), action_fail(action_fail) { for (auto &ev : deps) ev.chain(*this); @@ -41,22 +41,19 @@ event::trigger_self() { std::lock_guard<std::mutex> lock(mutex); std::vector<intrusive_ref<event>> evs; - if (!--wait_count) + if (!--_wait_count) std::swap(_chain, evs); + cv.notify_all(); return evs; } void event::trigger() { - auto evs = trigger_self(); - - if (signalled()) { + if (wait_count() == 1) action_ok(*this); - cv.notify_all(); - } - for (event &ev : evs) + for (event &ev : trigger_self()) ev.trigger(); } @@ -73,18 +70,21 @@ event::abort_self(cl_int status) { void event::abort(cl_int status) { - auto evs = abort_self(status); - action_fail(*this); - for (event &ev : evs) + for (event &ev : abort_self(status)) ev.abort(status); } +unsigned +event::wait_count() const { + std::lock_guard<std::mutex> lock(mutex); + return _wait_count; +} + bool event::signalled() const { - std::lock_guard<std::mutex> lock(mutex); - return !wait_count; + return !wait_count(); } cl_int @@ -99,20 +99,25 @@ event::chain(event &ev) { std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock); std::lock(lock, lock_ev); - if (wait_count) { - ev.wait_count++; + if (_wait_count) { + ev._wait_count++; _chain.push_back(ev); } ev.deps.push_back(*this); } void +event::wait_signalled() const { + std::unique_lock<std::mutex> lock(mutex); + cv.wait(lock, [=]{ return !_wait_count; }); +} + +void event::wait() const { for (event &ev : deps) ev.wait(); - std::unique_lock<std::mutex> lock(mutex); - cv.wait(lock, [=]{ return !wait_count; }); + wait_signalled(); } hard_event::hard_event(command_queue &q, cl_command_type command, diff --git a/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp b/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp index 6469e483c..03c97bcf4 100644 --- a/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp +++ b/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp @@ -69,6 +69,7 @@ namespace clover { virtual cl_int status() const; virtual command_queue *queue() const = 0; virtual cl_command_type command() const = 0; + void wait_signalled() const; virtual void wait() const; virtual struct pipe_fence_handle *fence() const { @@ -85,8 +86,9 @@ namespace clover { private: std::vector<intrusive_ref<event>> trigger_self(); std::vector<intrusive_ref<event>> abort_self(cl_int status); + unsigned wait_count() const; - unsigned wait_count; + unsigned _wait_count; cl_int _status; action action_ok; action action_fail; diff --git a/lib/mesa/src/gallium/state_trackers/dri/Makefile.am b/lib/mesa/src/gallium/state_trackers/dri/Makefile.am index 61a1cabeb..8b427f98a 100644 --- a/lib/mesa/src/gallium/state_trackers/dri/Makefile.am +++ b/lib/mesa/src/gallium/state_trackers/dri/Makefile.am @@ -28,7 +28,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ - -I$(top_builddir)/src/mesa/drivers/dri/common \ + -I$(top_builddir)/src/util \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ $(GALLIUM_CFLAGS) \ $(LIBDRM_CFLAGS) \ diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_context.h b/lib/mesa/src/gallium/state_trackers/dri/dri_context.h index 96f06442f..afa9c49ff 100644 --- a/lib/mesa/src/gallium/state_trackers/dri/dri_context.h +++ b/lib/mesa/src/gallium/state_trackers/dri/dri_context.h @@ -90,6 +90,7 @@ dri_create_context(gl_api api, unsigned minor_version, uint32_t flags, bool notify_reset, + unsigned priority, unsigned *error, void *sharedContextPrivate); diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h b/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h index 1f9842ea5..7c45004ba 100644 --- a/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h +++ b/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h @@ -85,6 +85,8 @@ struct dri_drawable void (*update_tex_buffer)(struct dri_drawable *drawable, struct dri_context *ctx, struct pipe_resource *res); + void (*flush_swapbuffers)(struct dri_context *ctx, + struct dri_drawable *drawable); }; static inline struct dri_drawable * diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c new file mode 100644 index 000000000..06309d8f0 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c @@ -0,0 +1,388 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <dlfcn.h> +#include "util/u_memory.h" +#include "pipe/p_screen.h" +#include "state_tracker/st_texture.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" +#include "main/texobj.h" + +#include "dri_helpers.h" + +static bool +dri2_is_opencl_interop_loaded_locked(struct dri_screen *screen) +{ + return screen->opencl_dri_event_add_ref && + screen->opencl_dri_event_release && + screen->opencl_dri_event_wait && + screen->opencl_dri_event_get_fence; +} + +static bool +dri2_load_opencl_interop(struct dri_screen *screen) +{ +#if defined(RTLD_DEFAULT) + bool success; + + mtx_lock(&screen->opencl_func_mutex); + + if (dri2_is_opencl_interop_loaded_locked(screen)) { + mtx_unlock(&screen->opencl_func_mutex); + return true; + } + + screen->opencl_dri_event_add_ref = + dlsym(RTLD_DEFAULT, "opencl_dri_event_add_ref"); + screen->opencl_dri_event_release = + dlsym(RTLD_DEFAULT, "opencl_dri_event_release"); + screen->opencl_dri_event_wait = + dlsym(RTLD_DEFAULT, "opencl_dri_event_wait"); + screen->opencl_dri_event_get_fence = + dlsym(RTLD_DEFAULT, "opencl_dri_event_get_fence"); + + success = dri2_is_opencl_interop_loaded_locked(screen); + mtx_unlock(&screen->opencl_func_mutex); + return success; +#else + return false; +#endif +} + +struct dri2_fence { + struct dri_screen *driscreen; + struct pipe_fence_handle *pipe_fence; + void *cl_event; +}; + +static unsigned dri2_fence_get_caps(__DRIscreen *_screen) +{ + struct dri_screen *driscreen = dri_screen(_screen); + struct pipe_screen *screen = driscreen->base.screen; + unsigned caps = 0; + + if (screen->get_param(screen, PIPE_CAP_NATIVE_FENCE_FD)) + caps |= __DRI_FENCE_CAP_NATIVE_FD; + + return caps; +} + +static void * +dri2_create_fence(__DRIcontext *_ctx) +{ + struct pipe_context *ctx = dri_context(_ctx)->st->pipe; + struct dri2_fence *fence = CALLOC_STRUCT(dri2_fence); + + if (!fence) + return NULL; + + ctx->flush(ctx, &fence->pipe_fence, 0); + + if (!fence->pipe_fence) { + FREE(fence); + return NULL; + } + + fence->driscreen = dri_screen(_ctx->driScreenPriv); + return fence; +} + +static void * +dri2_create_fence_fd(__DRIcontext *_ctx, int fd) +{ + struct pipe_context *ctx = dri_context(_ctx)->st->pipe; + struct dri2_fence *fence = CALLOC_STRUCT(dri2_fence); + + if (fd == -1) { + /* exporting driver created fence, flush: */ + ctx->flush(ctx, &fence->pipe_fence, + PIPE_FLUSH_DEFERRED | PIPE_FLUSH_FENCE_FD); + } else { + /* importing a foreign fence fd: */ + ctx->create_fence_fd(ctx, &fence->pipe_fence, fd); + } + if (!fence->pipe_fence) { + FREE(fence); + return NULL; + } + + fence->driscreen = dri_screen(_ctx->driScreenPriv); + return fence; +} + +static int +dri2_get_fence_fd(__DRIscreen *_screen, void *_fence) +{ + struct dri_screen *driscreen = dri_screen(_screen); + struct pipe_screen *screen = driscreen->base.screen; + struct dri2_fence *fence = (struct dri2_fence*)_fence; + + return screen->fence_get_fd(screen, fence->pipe_fence); +} + +static void * +dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event) +{ + struct dri_screen *driscreen = dri_screen(_screen); + struct dri2_fence *fence; + + if (!dri2_load_opencl_interop(driscreen)) + return NULL; + + fence = CALLOC_STRUCT(dri2_fence); + if (!fence) + return NULL; + + fence->cl_event = (void*)cl_event; + + if (!driscreen->opencl_dri_event_add_ref(fence->cl_event)) { + free(fence); + return NULL; + } + + fence->driscreen = driscreen; + return fence; +} + +static void +dri2_destroy_fence(__DRIscreen *_screen, void *_fence) +{ + struct dri_screen *driscreen = dri_screen(_screen); + struct pipe_screen *screen = driscreen->base.screen; + struct dri2_fence *fence = (struct dri2_fence*)_fence; + + if (fence->pipe_fence) + screen->fence_reference(screen, &fence->pipe_fence, NULL); + else if (fence->cl_event) + driscreen->opencl_dri_event_release(fence->cl_event); + else + assert(0); + + FREE(fence); +} + +static GLboolean +dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags, + uint64_t timeout) +{ + struct dri2_fence *fence = (struct dri2_fence*)_fence; + struct dri_screen *driscreen = fence->driscreen; + struct pipe_screen *screen = driscreen->base.screen; + + /* No need to flush. The context was flushed when the fence was created. */ + + if (fence->pipe_fence) + return screen->fence_finish(screen, NULL, fence->pipe_fence, timeout); + else if (fence->cl_event) { + struct pipe_fence_handle *pipe_fence = + driscreen->opencl_dri_event_get_fence(fence->cl_event); + + if (pipe_fence) + return screen->fence_finish(screen, NULL, pipe_fence, timeout); + else + return driscreen->opencl_dri_event_wait(fence->cl_event, timeout); + } + else { + assert(0); + return false; + } +} + +static void +dri2_server_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags) +{ + struct pipe_context *ctx = dri_context(_ctx)->st->pipe; + struct dri2_fence *fence = (struct dri2_fence*)_fence; + + if (ctx->fence_server_sync) + ctx->fence_server_sync(ctx, fence->pipe_fence); +} + +const __DRI2fenceExtension dri2FenceExtension = { + .base = { __DRI2_FENCE, 2 }, + + .create_fence = dri2_create_fence, + .get_fence_from_cl_event = dri2_get_fence_from_cl_event, + .destroy_fence = dri2_destroy_fence, + .client_wait_sync = dri2_client_wait_sync, + .server_wait_sync = dri2_server_wait_sync, + .get_capabilities = dri2_fence_get_caps, + .create_fence_fd = dri2_create_fence_fd, + .get_fence_fd = dri2_get_fence_fd, +}; + +__DRIimage * +dri2_lookup_egl_image(struct dri_screen *screen, void *handle) +{ + const __DRIimageLookupExtension *loader = screen->sPriv->dri2.image; + __DRIimage *img; + + if (!loader->lookupEGLImage) + return NULL; + + img = loader->lookupEGLImage(screen->sPriv, + handle, screen->sPriv->loaderPrivate); + + return img; +} + +__DRIimage * +dri2_create_image_from_renderbuffer2(__DRIcontext *context, + int renderbuffer, void *loaderPrivate, + unsigned *error) +{ + struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; + struct gl_renderbuffer *rb; + struct pipe_resource *tex; + __DRIimage *img; + + /* Section 3.9 (EGLImage Specification and Management) of the EGL 1.5 + * specification says: + * + * "If target is EGL_GL_RENDERBUFFER and buffer is not the name of a + * renderbuffer object, or if buffer is the name of a multisampled + * renderbuffer object, the error EGL_BAD_PARAMETER is generated." + * + * "If target is EGL_GL_TEXTURE_2D , EGL_GL_TEXTURE_CUBE_MAP_*, + * EGL_GL_RENDERBUFFER or EGL_GL_TEXTURE_3D and buffer refers to the + * default GL texture object (0) for the corresponding GL target, the + * error EGL_BAD_PARAMETER is generated." + * (rely on _mesa_lookup_renderbuffer returning NULL in this case) + */ + rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); + if (!rb || rb->NumSamples > 0) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + tex = st_get_renderbuffer_resource(rb); + if (!tex) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + img = CALLOC_STRUCT(__DRIimageRec); + if (!img) { + *error = __DRI_IMAGE_ERROR_BAD_ALLOC; + return NULL; + } + + img->dri_format = driGLFormatToImageFormat(rb->Format); + img->loader_private = loaderPrivate; + + if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + free(img); + return NULL; + } + + pipe_resource_reference(&img->texture, tex); + + *error = __DRI_IMAGE_ERROR_SUCCESS; + return img; +} + +__DRIimage * +dri2_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate) +{ + unsigned error; + return dri2_create_image_from_renderbuffer2(context, renderbuffer, + loaderPrivate, &error); +} + +void +dri2_destroy_image(__DRIimage *img) +{ + pipe_resource_reference(&img->texture, NULL); + FREE(img); +} + + +__DRIimage * +dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, + int depth, int level, unsigned *error, + void *loaderPrivate) +{ + __DRIimage *img; + struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; + struct gl_texture_object *obj; + struct pipe_resource *tex; + GLuint face = 0; + + obj = _mesa_lookup_texture(ctx, texture); + if (!obj || obj->Target != target) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + tex = st_get_texobj_resource(obj); + if (!tex) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (target == GL_TEXTURE_CUBE_MAP) + face = depth; + + _mesa_test_texobj_completeness(ctx, obj); + if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (level < obj->BaseLevel || level > obj->_MaxLevel) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + + if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < depth) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + + img = CALLOC_STRUCT(__DRIimageRec); + if (!img) { + *error = __DRI_IMAGE_ERROR_BAD_ALLOC; + return NULL; + } + + img->level = level; + img->layer = depth; + img->dri_format = driGLFormatToImageFormat(obj->Image[face][level]->TexFormat); + + img->loader_private = loaderPrivate; + + if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + free(img); + return NULL; + } + + pipe_resource_reference(&img->texture, tex); + + *error = __DRI_IMAGE_ERROR_SUCCESS; + return img; +} + +/* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h new file mode 100644 index 000000000..76f024fd6 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DRI_HELPERS_H +#define DRI_HELPERS_H + +#include "dri_context.h" +#include "dri_screen.h" + +extern const __DRI2fenceExtension dri2FenceExtension; + +__DRIimage * +dri2_lookup_egl_image(struct dri_screen *screen, void *handle); + +__DRIimage * +dri2_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate); + +__DRIimage * +dri2_create_image_from_renderbuffer2(__DRIcontext *context, + int renderbuffer, void *loaderPrivate, + unsigned *error); + +void +dri2_destroy_image(__DRIimage *img); + +__DRIimage * +dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, + int depth, int level, unsigned *error, + void *loaderPrivate); +#endif + +/* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am new file mode 100644 index 000000000..fcd8b2c31 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am @@ -0,0 +1,35 @@ +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + $(GALLIUM_CFLAGS) \ + $(VISIBILITY_CFLAGS) \ + $(VL_CFLAGS) \ + $(XCB_DRI3_CFLAGS) \ + $(OMX_BELLAGIO_CFLAGS) + +noinst_LTLIBRARIES = libomxtracker.la + +libomxtracker_la_SOURCES = $(C_SOURCES) diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in new file mode 100644 index 000000000..b4c346766 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in @@ -0,0 +1,899 @@ +# Makefile.in generated by automake 1.15.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2017 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@HAVE_LIBDRM_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) + +@HAVE_DRISW_TRUE@am__append_2 = \ +@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la + +@HAVE_DRISW_KMS_TRUE@am__append_3 = \ +@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ +@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) + +subdir = src/gallium/state_trackers/omx_bellagio +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_gnu_make.m4 \ + $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ + $(top_srcdir)/m4/ax_gcc_builtin.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_flex.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/VERSION $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libomxtracker_la_LIBADD = +am__objects_1 = entrypoint.lo vid_dec.lo vid_dec_mpeg12.lo \ + vid_dec_h264.lo vid_dec_h265.lo vid_enc.lo +am_libomxtracker_la_OBJECTS = $(am__objects_1) +libomxtracker_la_OBJECTS = $(am_libomxtracker_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libomxtracker_la_SOURCES) +DIST_SOURCES = $(libomxtracker_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ + $(top_srcdir)/bin/depcomp \ + $(top_srcdir)/src/gallium/Automake.inc +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ +AMDGPU_LIBS = @AMDGPU_LIBS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +ANDROID_CFLAGS = @ANDROID_CFLAGS@ +ANDROID_LIBS = @ANDROID_LIBS@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BSYMBOLIC = @BSYMBOLIC@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ +CLOCK_LIB = @CLOCK_LIB@ +CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ +DEFINES = @DEFINES@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLOPEN_LIBS = @DLOPEN_LIBS@ +DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ +DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRIGL_CFLAGS = @DRIGL_CFLAGS@ +DRIGL_LIBS = @DRIGL_LIBS@ +DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ +DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ +DRI_LIB_DEPS = @DRI_LIB_DEPS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGL_CFLAGS = @EGL_CFLAGS@ +EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGREP = @EGREP@ +ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ +ETNAVIV_LIBS = @ETNAVIV_LIBS@ +EXEEXT = @EXEEXT@ +EXPAT_CFLAGS = @EXPAT_CFLAGS@ +EXPAT_LIBS = @EXPAT_LIBS@ +FGREP = @FGREP@ +FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ +FREEDRENO_LIBS = @FREEDRENO_LIBS@ +GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ +GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ +GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ +GC_SECTIONS = @GC_SECTIONS@ +GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ +GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ +GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ +GLPROTO_LIBS = @GLPROTO_LIBS@ +GLVND_CFLAGS = @GLVND_CFLAGS@ +GLVND_LIBS = @GLVND_LIBS@ +GLX_TLS = @GLX_TLS@ +GL_LIB = @GL_LIB@ +GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GREP = @GREP@ +HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +I915_CFLAGS = @I915_CFLAGS@ +I915_LIBS = @I915_LIBS@ +INDENT = @INDENT@ +INDENT_FLAGS = @INDENT_FLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ +LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ +LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ +LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ +LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBELF_CFLAGS = @LIBELF_CFLAGS@ +LIBELF_LIBS = @LIBELF_LIBS@ +LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIB_DIR = @LIB_DIR@ +LIB_EXT = @LIB_EXT@ +LIPO = @LIPO@ +LLVM_CFLAGS = @LLVM_CFLAGS@ +LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ +LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ +LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBS = @LLVM_LIBS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ +MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ +NINE_MAJOR = @NINE_MAJOR@ +NINE_MINOR = @NINE_MINOR@ +NINE_TINY = @NINE_TINY@ +NINE_VERSION = @NINE_VERSION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ +NOUVEAU_LIBS = @NOUVEAU_LIBS@ +NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ +NVVIEUX_LIBS = @NVVIEUX_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@ +OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@ +OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@ +OPENCL_LIBNAME = @OPENCL_LIBNAME@ +OPENCL_VERSION = @OPENCL_VERSION@ +OSMESA_LIB = @OSMESA_LIB@ +OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ +OSMESA_PC_REQ = @OSMESA_PC_REQ@ +OSMESA_VERSION = @OSMESA_VERSION@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSIX_SHELL = @POSIX_SHELL@ +PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ +PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PWR8_CFLAGS = @PWR8_CFLAGS@ +PYTHON2 = @PYTHON2@ +RADEON_CFLAGS = @RADEON_CFLAGS@ +RADEON_LIBS = @RADEON_LIBS@ +RANLIB = @RANLIB@ +RM = @RM@ +SED = @SED@ +SELINUX_CFLAGS = @SELINUX_CFLAGS@ +SELINUX_LIBS = @SELINUX_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ +SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ +SSE41_CFLAGS = @SSE41_CFLAGS@ +STRIP = @STRIP@ +SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ +SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ +SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ +SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ +SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VA_CFLAGS = @VA_CFLAGS@ +VA_LIBS = @VA_LIBS@ +VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ +VA_MAJOR = @VA_MAJOR@ +VA_MINOR = @VA_MINOR@ +VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@ +VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@ +VDPAU_CFLAGS = @VDPAU_CFLAGS@ +VDPAU_LIBS = @VDPAU_LIBS@ +VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ +VDPAU_MAJOR = @VDPAU_MAJOR@ +VDPAU_MINOR = @VDPAU_MINOR@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +VL_CFLAGS = @VL_CFLAGS@ +VL_LIBS = @VL_LIBS@ +VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ +WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@ +WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@ +WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ +WAYLAND_SCANNER = @WAYLAND_SCANNER@ +WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ +WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ +WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@ +WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@ +WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@ +X11_INCLUDES = @X11_INCLUDES@ +XA_MAJOR = @XA_MAJOR@ +XA_MINOR = @XA_MINOR@ +XA_TINY = @XA_TINY@ +XA_VERSION = @XA_VERSION@ +XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ +XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@ +XCB_DRI3_LIBS = @XCB_DRI3_LIBS@ +XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ +XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ +XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ +XLIBGL_LIBS = @XLIBGL_LIBS@ +XVMC_CFLAGS = @XVMC_CFLAGS@ +XVMC_LIBS = @XVMC_LIBS@ +XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ +XVMC_MAJOR = @XVMC_MAJOR@ +XVMC_MINOR = @XVMC_MINOR@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acv_mako_found = @acv_mako_found@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +ifGNUmake = @ifGNUmake@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +C_SOURCES := \ + entrypoint.c \ + entrypoint.h \ + vid_dec.c \ + vid_dec.h \ + vid_dec_mpeg12.c \ + vid_dec_h264.c \ + vid_dec_h265.c \ + vid_enc.c \ + vid_enc.h + +GALLIUM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + + +# src/gallium/auxiliary must appear before src/gallium/drivers +# because there are stupidly two rbug_context.h files in +# different directories, and which one is included by the +# preprocessor is determined by the ordering of the -I flags. +GALLIUM_DRIVER_CFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_DRIVER_CXXFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CXXFLAGS) + +GALLIUM_TARGET_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/loader \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(top_builddir)/src/util/ \ + -I$(top_builddir)/src/gallium/drivers/ \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) \ + $(LIBDRM_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ + $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) +GALLIUM_WINSYS_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_2) $(am__append_3) +AM_CFLAGS = \ + $(GALLIUM_CFLAGS) \ + $(VISIBILITY_CFLAGS) \ + $(VL_CFLAGS) \ + $(XCB_DRI3_CFLAGS) \ + $(OMX_BELLAGIO_CFLAGS) + +noinst_LTLIBRARIES = libomxtracker.la +libomxtracker_la_SOURCES = $(C_SOURCES) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/state_trackers/omx_bellagio/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/gallium/state_trackers/omx_bellagio/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libomxtracker.la: $(libomxtracker_la_OBJECTS) $(libomxtracker_la_DEPENDENCIES) $(EXTRA_libomxtracker_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libomxtracker_la_OBJECTS) $(libomxtracker_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entrypoint.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_h264.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_h265.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_mpeg12.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_enc.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources new file mode 100644 index 000000000..ab60ce803 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources @@ -0,0 +1,10 @@ +C_SOURCES := \ + entrypoint.c \ + entrypoint.h \ + vid_dec.c \ + vid_dec.h \ + vid_dec_mpeg12.c \ + vid_dec_h264.c \ + vid_dec_h265.c \ + vid_enc.c \ + vid_enc.h diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c new file mode 100644 index 000000000..251cc7d65 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include <assert.h> +#include <string.h> +#include <stdbool.h> + +#if defined(HAVE_X11_PLATFORM) +#include <X11/Xlib.h> +#else +#define XOpenDisplay(x) NULL +#define XCloseDisplay(x) +#define Display void +#endif + +#include "os/os_thread.h" +#include "util/u_memory.h" +#include "loader/loader.h" + +#include "entrypoint.h" +#include "vid_dec.h" +#include "vid_enc.h" + +static mtx_t omx_lock = _MTX_INITIALIZER_NP; +static Display *omx_display = NULL; +static struct vl_screen *omx_screen = NULL; +static unsigned omx_usecount = 0; +static const char *omx_render_node = NULL; +static int drm_fd; + +int omx_component_library_Setup(stLoaderComponentType **stComponents) +{ + OMX_ERRORTYPE r; + + if (stComponents == NULL) + return 2; + + /* component 0 - video decoder */ + r = vid_dec_LoaderComponent(stComponents[0]); + if (r != OMX_ErrorNone) + return OMX_ErrorInsufficientResources; + + /* component 1 - video encoder */ + r = vid_enc_LoaderComponent(stComponents[1]); + if (r != OMX_ErrorNone) + return OMX_ErrorInsufficientResources; + + return 2; +} + +struct vl_screen *omx_get_screen(void) +{ + static bool first_time = true; + mtx_lock(&omx_lock); + + if (!omx_screen) { + if (first_time) { + omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL); + first_time = false; + } + if (omx_render_node) { + drm_fd = loader_open_device(omx_render_node); + if (drm_fd < 0) + goto error; + + omx_screen = vl_drm_screen_create(drm_fd); + if (!omx_screen) { + close(drm_fd); + goto error; + } + } else { + omx_display = XOpenDisplay(NULL); + if (!omx_display) + goto error; + + omx_screen = vl_dri3_screen_create(omx_display, 0); + if (!omx_screen) + omx_screen = vl_dri2_screen_create(omx_display, 0); + if (!omx_screen) { + XCloseDisplay(omx_display); + goto error; + } + } + } + + ++omx_usecount; + + mtx_unlock(&omx_lock); + return omx_screen; + +error: + mtx_unlock(&omx_lock); + return NULL; +} + +void omx_put_screen(void) +{ + mtx_lock(&omx_lock); + if ((--omx_usecount) == 0) { + omx_screen->destroy(omx_screen); + omx_screen = NULL; + + if (omx_render_node) + close(drm_fd); + else + XCloseDisplay(omx_display); + } + mtx_unlock(&omx_lock); +} + +OMX_ERRORTYPE omx_workaround_Destructor(OMX_COMPONENTTYPE *comp) +{ + omx_base_component_PrivateType* priv = (omx_base_component_PrivateType*)comp->pComponentPrivate; + + priv->state = OMX_StateInvalid; + tsem_up(priv->messageSem); + + /* wait for thread to exit */ + pthread_join(priv->messageHandlerThread, NULL); + + return omx_base_component_Destructor(comp); +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h new file mode 100644 index 000000000..7625d7a5b --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef OMX_ENTRYPOINT_H +#define OMX_ENTRYPOINT_H + +#include <bellagio/st_static_component_loader.h> + +#include "vl/vl_winsys.h" + +PUBLIC extern int omx_component_library_Setup(stLoaderComponentType **stComponents); + +struct vl_screen *omx_get_screen(void); +void omx_put_screen(void); + +OMX_ERRORTYPE omx_workaround_Destructor(OMX_COMPONENTTYPE *comp); + +#endif diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c new file mode 100644 index 000000000..f9fe19f63 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c @@ -0,0 +1,644 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + + +#include <assert.h> + +#include <OMX_Video.h> + +/* bellagio defines a DEBUG macro that we don't want */ +#ifndef DEBUG +#include <bellagio/omxcore.h> +#undef DEBUG +#else +#include <bellagio/omxcore.h> +#endif + +#include "pipe/p_screen.h" +#include "pipe/p_video_codec.h" +#include "util/u_memory.h" +#include "util/u_surface.h" +#include "vl/vl_video_buffer.h" +#include "vl/vl_vlc.h" + +#include "entrypoint.h" +#include "vid_dec.h" + +static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name); +static OMX_ERRORTYPE vid_dec_Destructor(OMX_COMPONENTTYPE *comp); +static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param); +static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param); +static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE *comp, internalRequestMessageType *msg); +static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf); +static OMX_ERRORTYPE vid_dec_FreeDecBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf); +static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output); + +OMX_ERRORTYPE vid_dec_LoaderComponent(stLoaderComponentType *comp) +{ + comp->componentVersion.s.nVersionMajor = 0; + comp->componentVersion.s.nVersionMinor = 0; + comp->componentVersion.s.nRevision = 0; + comp->componentVersion.s.nStep = 1; + comp->name_specific_length = 3; + + comp->name = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->name == NULL) + goto error; + + comp->name_specific = CALLOC(comp->name_specific_length, sizeof(char *)); + if (comp->name_specific == NULL) + goto error; + + comp->role_specific = CALLOC(comp->name_specific_length, sizeof(char *)); + if (comp->role_specific == NULL) + goto error; + + comp->name_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->name_specific[0] == NULL) + goto error_specific; + + comp->name_specific[1] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->name_specific[1] == NULL) + goto error_specific; + + comp->name_specific[2] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->name_specific[2] == NULL) + goto error_specific; + + comp->role_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->role_specific[0] == NULL) + goto error_specific; + + comp->role_specific[1] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->role_specific[1] == NULL) + goto error_specific; + + comp->role_specific[2] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->role_specific[2] == NULL) + goto error_specific; + + strcpy(comp->name, OMX_VID_DEC_BASE_NAME); + strcpy(comp->name_specific[0], OMX_VID_DEC_MPEG2_NAME); + strcpy(comp->name_specific[1], OMX_VID_DEC_AVC_NAME); + strcpy(comp->name_specific[2], OMX_VID_DEC_HEVC_NAME); + + strcpy(comp->role_specific[0], OMX_VID_DEC_MPEG2_ROLE); + strcpy(comp->role_specific[1], OMX_VID_DEC_AVC_ROLE); + strcpy(comp->role_specific[2], OMX_VID_DEC_HEVC_ROLE); + + comp->constructor = vid_dec_Constructor; + + return OMX_ErrorNone; + +error_specific: + FREE(comp->role_specific[2]); + FREE(comp->role_specific[1]); + FREE(comp->role_specific[0]); + FREE(comp->name_specific[2]); + FREE(comp->name_specific[1]); + FREE(comp->name_specific[0]); + +error: + FREE(comp->role_specific); + FREE(comp->name_specific); + + FREE(comp->name); + + return OMX_ErrorInsufficientResources; +} + +static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name) +{ + vid_dec_PrivateType *priv; + omx_base_video_PortType *port; + struct pipe_screen *screen; + OMX_ERRORTYPE r; + int i; + + assert(!comp->pComponentPrivate); + + priv = comp->pComponentPrivate = CALLOC(1, sizeof(vid_dec_PrivateType)); + if (!priv) + return OMX_ErrorInsufficientResources; + + r = omx_base_filter_Constructor(comp, name); + if (r) + return r; + + priv->profile = PIPE_VIDEO_PROFILE_UNKNOWN; + + if (!strcmp(name, OMX_VID_DEC_MPEG2_NAME)) + priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN; + + if (!strcmp(name, OMX_VID_DEC_AVC_NAME)) + priv->profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + + if (!strcmp(name, OMX_VID_DEC_HEVC_NAME)) + priv->profile = PIPE_VIDEO_PROFILE_HEVC_MAIN; + + priv->BufferMgmtCallback = vid_dec_FrameDecoded; + priv->messageHandler = vid_dec_MessageHandler; + priv->destructor = vid_dec_Destructor; + + comp->SetParameter = vid_dec_SetParameter; + comp->GetParameter = vid_dec_GetParameter; + + priv->screen = omx_get_screen(); + if (!priv->screen) + return OMX_ErrorInsufficientResources; + + screen = priv->screen->pscreen; + priv->pipe = screen->context_create(screen, NULL, 0); + if (!priv->pipe) + return OMX_ErrorInsufficientResources; + + if (!vl_compositor_init(&priv->compositor, priv->pipe)) { + priv->pipe->destroy(priv->pipe); + priv->pipe = NULL; + return OMX_ErrorInsufficientResources; + } + + if (!vl_compositor_init_state(&priv->cstate, priv->pipe)) { + vl_compositor_cleanup(&priv->compositor); + priv->pipe->destroy(priv->pipe); + priv->pipe = NULL; + return OMX_ErrorInsufficientResources; + } + + priv->sPortTypesParam[OMX_PortDomainVideo].nStartPortNumber = 0; + priv->sPortTypesParam[OMX_PortDomainVideo].nPorts = 2; + priv->ports = CALLOC(2, sizeof(omx_base_PortType *)); + if (!priv->ports) + return OMX_ErrorInsufficientResources; + + for (i = 0; i < 2; ++i) { + priv->ports[i] = CALLOC(1, sizeof(omx_base_video_PortType)); + if (!priv->ports[i]) + return OMX_ErrorInsufficientResources; + + base_video_port_Constructor(comp, &priv->ports[i], i, i == 0); + } + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + strcpy(port->sPortParam.format.video.cMIMEType,"video/MPEG2"); + port->sPortParam.nBufferCountMin = 8; + port->sPortParam.nBufferCountActual = 8; + port->sPortParam.nBufferSize = DEFAULT_OUT_BUFFER_SIZE; + port->sPortParam.format.video.nFrameWidth = 176; + port->sPortParam.format.video.nFrameHeight = 144; + port->sPortParam.format.video.eCompressionFormat = OMX_VIDEO_CodingMPEG2; + port->sVideoParam.eCompressionFormat = OMX_VIDEO_CodingMPEG2; + port->Port_SendBufferFunction = vid_dec_DecodeBuffer; + port->Port_FreeBuffer = vid_dec_FreeDecBuffer; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]; + port->sPortParam.nBufferCountActual = 8; + port->sPortParam.nBufferCountMin = 4; + port->sPortParam.format.video.nFrameWidth = 176; + port->sPortParam.format.video.nFrameHeight = 144; + port->sPortParam.format.video.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar; + port->sVideoParam.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar; + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_dec_Destructor(OMX_COMPONENTTYPE *comp) +{ + vid_dec_PrivateType* priv = comp->pComponentPrivate; + int i; + + if (priv->ports) { + for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) { + if(priv->ports[i]) + priv->ports[i]->PortDestructor(priv->ports[i]); + } + FREE(priv->ports); + priv->ports=NULL; + } + + if (priv->pipe) { + vl_compositor_cleanup_state(&priv->cstate); + vl_compositor_cleanup(&priv->compositor); + priv->pipe->destroy(priv->pipe); + } + + if (priv->screen) + omx_put_screen(); + + return omx_workaround_Destructor(comp); +} + +static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_dec_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + + if (!param) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexParamPortDefinition: { + OMX_PARAM_PORTDEFINITIONTYPE *def = param; + + r = omx_base_component_SetParameter(handle, idx, param); + if (r) + return r; + + if (def->nPortIndex == OMX_BASE_FILTER_INPUTPORT_INDEX) { + omx_base_video_PortType *port; + unsigned framesize = def->format.video.nFrameWidth * def->format.video.nFrameHeight; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + port->sPortParam.nBufferSize = framesize * 512 / (16*16); + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]; + port->sPortParam.format.video.nFrameWidth = def->format.video.nFrameWidth; + port->sPortParam.format.video.nFrameHeight = def->format.video.nFrameHeight; + port->sPortParam.format.video.nStride = def->format.video.nFrameWidth; + port->sPortParam.format.video.nSliceHeight = def->format.video.nFrameHeight; + port->sPortParam.nBufferSize = framesize*3/2; + + priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged, + OMX_BASE_FILTER_OUTPUTPORT_INDEX, 0, NULL); + } + break; + } + case OMX_IndexParamStandardComponentRole: { + OMX_PARAM_COMPONENTROLETYPE *role = param; + + r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE)); + if (r) + return r; + + if (!strcmp((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE)) { + priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN; + } else if (!strcmp((char *)role->cRole, OMX_VID_DEC_AVC_ROLE)) { + priv->profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + } else if (!strcmp((char *)role->cRole, OMX_VID_DEC_HEVC_ROLE)) { + priv->profile = PIPE_VIDEO_PROFILE_HEVC_MAIN; + } else { + return OMX_ErrorBadParameter; + } + + break; + } + case OMX_IndexParamVideoPortFormat: { + OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param; + omx_base_video_PortType *port; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + if (r) + return r; + + if (format->nPortIndex > 1) + return OMX_ErrorBadPortIndex; + + port = (omx_base_video_PortType *)priv->ports[format->nPortIndex]; + memcpy(&port->sVideoParam, format, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + break; + } + default: + return omx_base_component_SetParameter(handle, idx, param); + } + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_dec_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + + if (!param) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexParamStandardComponentRole: { + OMX_PARAM_COMPONENTROLETYPE *role = param; + + r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE)); + if (r) + return r; + + if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN) + strcpy((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE); + else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH) + strcpy((char *)role->cRole, OMX_VID_DEC_AVC_ROLE); + else if (priv->profile == PIPE_VIDEO_PROFILE_HEVC_MAIN) + strcpy((char *)role->cRole, OMX_VID_DEC_HEVC_ROLE); + + break; + } + + case OMX_IndexParamVideoInit: + r = checkHeader(param, sizeof(OMX_PORT_PARAM_TYPE)); + if (r) + return r; + + memcpy(param, &priv->sPortTypesParam[OMX_PortDomainVideo], sizeof(OMX_PORT_PARAM_TYPE)); + break; + + case OMX_IndexParamVideoPortFormat: { + OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param; + omx_base_video_PortType *port; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + if (r) + return r; + + if (format->nPortIndex > 1) + return OMX_ErrorBadPortIndex; + + port = (omx_base_video_PortType *)priv->ports[format->nPortIndex]; + memcpy(format, &port->sVideoParam, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + break; + } + + default: + return omx_base_component_GetParameter(handle, idx, param); + + } + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE* comp, internalRequestMessageType *msg) +{ + vid_dec_PrivateType* priv = comp->pComponentPrivate; + + if (msg->messageType == OMX_CommandStateSet) { + if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) { + if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN) + vid_dec_mpeg12_Init(priv); + else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH) + vid_dec_h264_Init(priv); + else if (priv->profile == PIPE_VIDEO_PROFILE_HEVC_MAIN) + vid_dec_h265_Init(priv); + + } else if ((msg->messageParam == OMX_StateLoaded) && (priv->state == OMX_StateIdle)) { + if (priv->shadow) { + priv->shadow->destroy(priv->shadow); + priv->shadow = NULL; + } + if (priv->codec) { + priv->codec->destroy(priv->codec); + priv->codec = NULL; + } + } + } + + return omx_base_component_MessageHandler(comp, msg); +} + +void vid_dec_NeedTarget(vid_dec_PrivateType *priv) +{ + struct pipe_video_buffer templat = {}; + struct vl_screen *omx_screen; + struct pipe_screen *pscreen; + + omx_screen = priv->screen; + assert(omx_screen); + + pscreen = omx_screen->pscreen; + assert(pscreen); + + if (!priv->target) { + memset(&templat, 0, sizeof(templat)); + + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = priv->codec->width; + templat.height = priv->codec->height; + templat.buffer_format = pscreen->get_video_param( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERED_FORMAT + ); + templat.interlaced = pscreen->get_video_param( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERS_INTERLACED + ); + priv->target = priv->pipe->create_video_buffer(priv->pipe, &templat); + } +} + +static void vid_dec_FreeInputPortPrivate(OMX_BUFFERHEADERTYPE *buf) +{ + struct pipe_video_buffer *vbuf = buf->pInputPortPrivate; + if (!vbuf) + return; + + vbuf->destroy(vbuf); + buf->pInputPortPrivate = NULL; +} + +static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_dec_PrivateType *priv = comp->pComponentPrivate; + unsigned i = priv->num_in_buffers++; + OMX_ERRORTYPE r; + + priv->in_buffers[i] = buf; + priv->sizes[i] = buf->nFilledLen; + priv->inputs[i] = buf->pBuffer; + priv->timestamps[i] = buf->nTimeStamp; + + while (priv->num_in_buffers > (!!(buf->nFlags & OMX_BUFFERFLAG_EOS) ? 0 : 1)) { + bool eos = !!(priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS); + unsigned min_bits_left = eos ? 32 : MAX2(buf->nFilledLen * 8, 32); + struct vl_vlc vlc; + + vl_vlc_init(&vlc, priv->num_in_buffers, priv->inputs, priv->sizes); + + if (priv->slice) + priv->bytes_left = vl_vlc_bits_left(&vlc) / 8; + + while (vl_vlc_bits_left(&vlc) > min_bits_left) { + priv->Decode(priv, &vlc, min_bits_left); + vl_vlc_fillbits(&vlc); + } + + if (priv->slice) { + unsigned bytes = priv->bytes_left - vl_vlc_bits_left(&vlc) / 8; + + priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base, + 1, &priv->slice, &bytes); + + if (priv->num_in_buffers) + priv->slice = priv->inputs[1]; + else + priv->slice = NULL; + } + + if (eos && priv->frame_started) + priv->EndFrame(priv); + + if (priv->frame_finished) { + priv->frame_finished = false; + priv->in_buffers[0]->nFilledLen = priv->in_buffers[0]->nAllocLen; + r = base_port_SendBufferFunction(port, priv->in_buffers[0]); + } else if (eos) { + vid_dec_FreeInputPortPrivate(priv->in_buffers[0]); + priv->in_buffers[0]->nFilledLen = priv->in_buffers[0]->nAllocLen; + r = base_port_SendBufferFunction(port, priv->in_buffers[0]); + } else { + priv->in_buffers[0]->nFilledLen = 0; + r = port->ReturnBufferFunction(port, priv->in_buffers[0]); + } + + if (--priv->num_in_buffers) { + unsigned delta = MIN2((min_bits_left - vl_vlc_bits_left(&vlc)) / 8, priv->sizes[1]); + + priv->in_buffers[0] = priv->in_buffers[1]; + priv->sizes[0] = priv->sizes[1] - delta; + priv->inputs[0] = priv->inputs[1] + delta; + priv->timestamps[0] = priv->timestamps[1]; + } + + if (r) + return r; + } + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_dec_FreeDecBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf) +{ + vid_dec_FreeInputPortPrivate(buf); + return base_port_FreeBuffer(port, idx, buf); +} + +static void vid_dec_FillOutput(vid_dec_PrivateType *priv, struct pipe_video_buffer *buf, + OMX_BUFFERHEADERTYPE* output) +{ + omx_base_PortType *port = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]; + OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video; + + struct pipe_sampler_view **views; + unsigned i, j; + unsigned width, height; + + views = buf->get_sampler_view_planes(buf); + + for (i = 0; i < 2 /* NV12 */; i++) { + if (!views[i]) continue; + width = def->nFrameWidth; + height = def->nFrameHeight; + vl_video_buffer_adjust_size(&width, &height, i, buf->chroma_format, buf->interlaced); + for (j = 0; j < views[i]->texture->array_size; ++j) { + struct pipe_box box = {0, 0, j, width, height, 1}; + struct pipe_transfer *transfer; + uint8_t *map, *dst; + map = priv->pipe->transfer_map(priv->pipe, views[i]->texture, 0, + PIPE_TRANSFER_READ, &box, &transfer); + if (!map) + return; + + dst = ((uint8_t*)output->pBuffer + output->nOffset) + j * def->nStride + + i * def->nFrameWidth * def->nFrameHeight; + util_copy_rect(dst, + views[i]->texture->format, + def->nStride * views[i]->texture->array_size, 0, 0, + box.width, box.height, map, transfer->stride, 0, 0); + + pipe_transfer_unmap(priv->pipe, transfer); + } + } +} + +static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, + OMX_BUFFERHEADERTYPE* output) +{ + vid_dec_PrivateType *priv = comp->pComponentPrivate; + bool eos = !!(input->nFlags & OMX_BUFFERFLAG_EOS); + OMX_TICKS timestamp; + + if (!input->pInputPortPrivate) { + input->pInputPortPrivate = priv->Flush(priv, ×tamp); + if (timestamp != OMX_VID_DEC_TIMESTAMP_INVALID) + input->nTimeStamp = timestamp; + } + + if (input->pInputPortPrivate) { + if (output->pInputPortPrivate && !priv->disable_tunnel) { + struct pipe_video_buffer *tmp, *vbuf, *new_vbuf; + + tmp = output->pOutputPortPrivate; + vbuf = input->pInputPortPrivate; + if (vbuf->interlaced) { + /* re-allocate the progressive buffer */ + omx_base_video_PortType *port; + struct pipe_video_buffer templat = {}; + struct u_rect src_rect, dst_rect; + + port = (omx_base_video_PortType *) + priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + memset(&templat, 0, sizeof(templat)); + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = port->sPortParam.format.video.nFrameWidth; + templat.height = port->sPortParam.format.video.nFrameHeight; + templat.buffer_format = PIPE_FORMAT_NV12; + templat.interlaced = false; + new_vbuf = priv->pipe->create_video_buffer(priv->pipe, &templat); + + /* convert the interlaced to the progressive */ + src_rect.x0 = dst_rect.x0 = 0; + src_rect.x1 = dst_rect.x1 = templat.width; + src_rect.y0 = dst_rect.y0 = 0; + src_rect.y1 = dst_rect.y1 = templat.height; + + vl_compositor_yuv_deint_full(&priv->cstate, &priv->compositor, + input->pInputPortPrivate, new_vbuf, + &src_rect, &dst_rect, VL_COMPOSITOR_WEAVE); + + /* set the progrssive buffer for next round */ + vbuf->destroy(vbuf); + input->pInputPortPrivate = new_vbuf; + } + output->pOutputPortPrivate = input->pInputPortPrivate; + input->pInputPortPrivate = tmp; + } else { + vid_dec_FillOutput(priv, input->pInputPortPrivate, output); + } + output->nFilledLen = output->nAllocLen; + output->nTimeStamp = input->nTimeStamp; + } + + if (eos && input->pInputPortPrivate) + vid_dec_FreeInputPortPrivate(input); + else + input->nFilledLen = 0; +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h new file mode 100644 index 000000000..7a10e750d --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef OMX_VID_DEC_H +#define OMX_VID_DEC_H + +#include <string.h> + +#include <OMX_Types.h> +#include <OMX_Component.h> +#include <OMX_Core.h> + +#include <bellagio/st_static_component_loader.h> +#include <bellagio/omx_base_filter.h> +#include <bellagio/omx_base_video_port.h> + +#include "pipe/p_video_state.h" +#include "os/os_thread.h" +#include "util/list.h" + +#include "vl/vl_compositor.h" + +#define OMX_VID_DEC_BASE_NAME "OMX.mesa.video_decoder" + +#define OMX_VID_DEC_MPEG2_NAME "OMX.mesa.video_decoder.mpeg2" +#define OMX_VID_DEC_MPEG2_ROLE "video_decoder.mpeg2" + +#define OMX_VID_DEC_AVC_NAME "OMX.mesa.video_decoder.avc" +#define OMX_VID_DEC_AVC_ROLE "video_decoder.avc" + +#define OMX_VID_DEC_HEVC_NAME "OMX.mesa.video_decoder.hevc" +#define OMX_VID_DEC_HEVC_ROLE "video_decoder.hevc" + +#define OMX_VID_DEC_TIMESTAMP_INVALID ((OMX_TICKS) -1) + +struct vl_vlc; + +DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType) +#define vid_dec_PrivateType_FIELDS omx_base_filter_PrivateType_FIELDS \ + enum pipe_video_profile profile; \ + struct vl_screen *screen; \ + struct pipe_context *pipe; \ + struct pipe_video_codec *codec; \ + void (*Decode)(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); \ + void (*EndFrame)(vid_dec_PrivateType *priv); \ + struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); \ + struct pipe_video_buffer *target, *shadow; \ + union { \ + struct { \ + uint8_t intra_matrix[64]; \ + uint8_t non_intra_matrix[64]; \ + } mpeg12; \ + struct { \ + unsigned nal_ref_idc; \ + bool IdrPicFlag; \ + unsigned idr_pic_id; \ + unsigned pic_order_cnt_lsb; \ + unsigned pic_order_cnt_msb; \ + unsigned delta_pic_order_cnt_bottom; \ + unsigned delta_pic_order_cnt[2]; \ + unsigned prevFrameNumOffset; \ + struct pipe_h264_sps sps[32]; \ + struct pipe_h264_pps pps[256]; \ + struct list_head dpb_list; \ + unsigned dpb_num; \ + } h264; \ + struct { \ + unsigned temporal_id; \ + unsigned level_idc; \ + unsigned pic_width_in_luma_samples; \ + unsigned pic_height_in_luma_samples; \ + bool IdrPicFlag; \ + int slice_prev_poc; \ + void *ref_pic_set_list; \ + void *rps; \ + struct pipe_h265_sps sps[16]; \ + struct pipe_h265_pps pps[64]; \ + struct list_head dpb_list; \ + unsigned dpb_num; \ + } h265; \ + } codec_data; \ + union { \ + struct pipe_picture_desc base; \ + struct pipe_mpeg12_picture_desc mpeg12; \ + struct pipe_h264_picture_desc h264; \ + struct pipe_h265_picture_desc h265; \ + } picture; \ + unsigned num_in_buffers; \ + OMX_BUFFERHEADERTYPE *in_buffers[2]; \ + const void *inputs[2]; \ + unsigned sizes[2]; \ + OMX_TICKS timestamps[2]; \ + OMX_TICKS timestamp; \ + bool first_buf_in_frame; \ + bool frame_finished; \ + bool frame_started; \ + unsigned bytes_left; \ + const void *slice; \ + bool disable_tunnel; \ + struct vl_compositor compositor; \ + struct vl_compositor_state cstate; +ENDCLASS(vid_dec_PrivateType) + +OMX_ERRORTYPE vid_dec_LoaderComponent(stLoaderComponentType *comp); + +/* used by MPEG12 and H264 implementation */ +void vid_dec_NeedTarget(vid_dec_PrivateType *priv); + +/* vid_dec_mpeg12.c */ +void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv); + +/* vid_dec_h264.c */ +void vid_dec_h264_Init(vid_dec_PrivateType *priv); + +/* vid_dec_h265.c */ +void vid_dec_h265_Init(vid_dec_PrivateType *priv); + +#endif diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c new file mode 100644 index 000000000..7ea71c104 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c @@ -0,0 +1,1032 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include "pipe/p_video_codec.h" +#include "util/u_memory.h" +#include "util/u_video.h" +#include "vl/vl_rbsp.h" +#include "vl/vl_zscan.h" + +#include "entrypoint.h" +#include "vid_dec.h" + +#define DPB_MAX_SIZE 5 + +struct dpb_list { + struct list_head list; + struct pipe_video_buffer *buffer; + OMX_TICKS timestamp; + int poc; +}; + +static const uint8_t Default_4x4_Intra[16] = { + 6, 13, 20, 28, 13, 20, 28, 32, + 20, 28, 32, 37, 28, 32, 37, 42 +}; + +static const uint8_t Default_4x4_Inter[16] = { + 10, 14, 20, 24, 14, 20, 24, 27, + 20, 24, 27, 30, 24, 27, 30, 34 +}; + +static const uint8_t Default_8x8_Intra[64] = { + 6, 10, 13, 16, 18, 23, 25, 27, + 10, 11, 16, 18, 23, 25, 27, 29, + 13, 16, 18, 23, 25, 27, 29, 31, + 16, 18, 23, 25, 27, 29, 31, 33, + 18, 23, 25, 27, 29, 31, 33, 36, + 23, 25, 27, 29, 31, 33, 36, 38, + 25, 27, 29, 31, 33, 36, 38, 40, + 27, 29, 31, 33, 36, 38, 40, 42 +}; + +static const uint8_t Default_8x8_Inter[64] = { + 9, 13, 15, 17, 19, 21, 22, 24, + 13, 13, 17, 19, 21, 22, 24, 25, + 15, 17, 19, 21, 22, 24, 25, 27, + 17, 19, 21, 22, 24, 25, 27, 28, + 19, 21, 22, 24, 25, 27, 28, 30, + 21, 22, 24, 25, 27, 28, 30, 32, + 22, 24, 25, 27, 28, 30, 32, 33, + 24, 25, 27, 28, 30, 32, 33, 35 +}; + +static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); +static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv); +static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); + +void vid_dec_h264_Init(vid_dec_PrivateType *priv) +{ + priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + + priv->Decode = vid_dec_h264_Decode; + priv->EndFrame = vid_dec_h264_EndFrame; + priv->Flush = vid_dec_h264_Flush; + + LIST_INITHEAD(&priv->codec_data.h264.dpb_list); + priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX; + priv->first_buf_in_frame = true; +} + +static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv) +{ + //TODO: sane buffer handling + + if (priv->frame_started) + return; + + if (!priv->codec) { + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + templat.profile = priv->profile; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.max_references = priv->picture.h264.num_ref_frames; + templat.expect_chunked_decode = true; + templat.width = port->sPortParam.format.video.nFrameWidth; + templat.height = port->sPortParam.format.video.nFrameHeight; + templat.level = priv->picture.h264.pps->sps->level_idc; + + priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); + } + + vid_dec_NeedTarget(priv); + + if (priv->first_buf_in_frame) + priv->timestamp = priv->timestamps[0]; + priv->first_buf_in_frame = false; + + priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames; + + priv->picture.h264.slice_count = 0; + priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = true; +} + +static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv, + OMX_TICKS *timestamp) +{ + struct dpb_list *entry, *result = NULL; + struct pipe_video_buffer *buf; + + /* search for the lowest poc and break on zeros */ + LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h264.dpb_list, list) { + + if (result && entry->poc == 0) + break; + + if (!result || entry->poc < result->poc) + result = entry; + } + + if (!result) + return NULL; + + buf = result->buffer; + if (timestamp) + *timestamp = result->timestamp; + + --priv->codec_data.h264.dpb_num; + LIST_DEL(&result->list); + FREE(result); + + return buf; +} + +static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv) +{ + struct dpb_list *entry; + struct pipe_video_buffer *tmp; + bool top_field_first; + OMX_TICKS timestamp; + + if (!priv->frame_started) + return; + + priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = false; + + // TODO: implement frame number handling + priv->picture.h264.frame_num_list[0] = priv->picture.h264.frame_num; + priv->picture.h264.field_order_cnt_list[0][0] = priv->picture.h264.frame_num; + priv->picture.h264.field_order_cnt_list[0][1] = priv->picture.h264.frame_num; + + top_field_first = priv->picture.h264.field_order_cnt[0] < priv->picture.h264.field_order_cnt[1]; + + if (priv->picture.h264.field_pic_flag && priv->picture.h264.bottom_field_flag != top_field_first) + return; + + /* add the decoded picture to the dpb list */ + entry = CALLOC_STRUCT(dpb_list); + if (!entry) + return; + + priv->first_buf_in_frame = true; + entry->buffer = priv->target; + entry->timestamp = priv->timestamp; + entry->poc = MIN2(priv->picture.h264.field_order_cnt[0], priv->picture.h264.field_order_cnt[1]); + LIST_ADDTAIL(&entry->list, &priv->codec_data.h264.dpb_list); + ++priv->codec_data.h264.dpb_num; + priv->target = NULL; + priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX; + + if (priv->codec_data.h264.dpb_num <= DPB_MAX_SIZE) + return; + + tmp = priv->in_buffers[0]->pInputPortPrivate; + priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv, ×tamp); + priv->in_buffers[0]->nTimeStamp = timestamp; + priv->target = tmp; + priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL; +} + +static void vui_parameters(struct vl_rbsp *rbsp) +{ + // TODO +} + +static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned sizeOfScalingList, + const uint8_t *defaultList, const uint8_t *fallbackList) +{ + unsigned lastScale = 8, nextScale = 8; + const int *list; + unsigned i; + + /* (pic|seq)_scaling_list_present_flag[i] */ + if (!vl_rbsp_u(rbsp, 1)) { + if (fallbackList) + memcpy(scalingList, fallbackList, sizeOfScalingList); + return; + } + + list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal; + for (i = 0; i < sizeOfScalingList; ++i ) { + + if (nextScale != 0) { + signed delta_scale = vl_rbsp_se(rbsp); + nextScale = (lastScale + delta_scale + 256) % 256; + if (i == 0 && nextScale == 0) { + memcpy(scalingList, defaultList, sizeOfScalingList); + return; + } + } + scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale; + lastScale = scalingList[list[i]]; + } +} + +static struct pipe_h264_sps *seq_parameter_set_id(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + unsigned id = vl_rbsp_ue(rbsp); + if (id >= ARRAY_SIZE(priv->codec_data.h264.sps)) + return NULL; /* invalid seq_parameter_set_id */ + + return &priv->codec_data.h264.sps[id]; +} + +static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + struct pipe_h264_sps *sps; + unsigned profile_idc, level_idc; + unsigned i; + + /* Sequence parameter set */ + profile_idc = vl_rbsp_u(rbsp, 8); + + /* constraint_set0_flag */ + vl_rbsp_u(rbsp, 1); + + /* constraint_set1_flag */ + vl_rbsp_u(rbsp, 1); + + /* constraint_set2_flag */ + vl_rbsp_u(rbsp, 1); + + /* constraint_set3_flag */ + vl_rbsp_u(rbsp, 1); + + /* constraint_set4_flag */ + vl_rbsp_u(rbsp, 1); + + /* constraint_set5_flag */ + vl_rbsp_u(rbsp, 1); + + /* reserved_zero_2bits */ + vl_rbsp_u(rbsp, 2); + + /* level_idc */ + level_idc = vl_rbsp_u(rbsp, 8); + + sps = seq_parameter_set_id(priv, rbsp); + if (!sps) + return; + + memset(sps, 0, sizeof(*sps)); + memset(sps->ScalingList4x4, 16, sizeof(sps->ScalingList4x4)); + memset(sps->ScalingList8x8, 16, sizeof(sps->ScalingList8x8)); + + sps->level_idc = level_idc; + + if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || + profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || + profile_idc == 128 || profile_idc == 138) { + + sps->chroma_format_idc = vl_rbsp_ue(rbsp); + + if (sps->chroma_format_idc == 3) + sps->separate_colour_plane_flag = vl_rbsp_u(rbsp, 1); + + sps->bit_depth_luma_minus8 = vl_rbsp_ue(rbsp); + + sps->bit_depth_chroma_minus8 = vl_rbsp_ue(rbsp); + + /* qpprime_y_zero_transform_bypass_flag */ + vl_rbsp_u(rbsp, 1); + + sps->seq_scaling_matrix_present_flag = vl_rbsp_u(rbsp, 1); + if (sps->seq_scaling_matrix_present_flag) { + + scaling_list(rbsp, sps->ScalingList4x4[0], 16, Default_4x4_Intra, Default_4x4_Intra); + scaling_list(rbsp, sps->ScalingList4x4[1], 16, Default_4x4_Intra, sps->ScalingList4x4[0]); + scaling_list(rbsp, sps->ScalingList4x4[2], 16, Default_4x4_Intra, sps->ScalingList4x4[1]); + scaling_list(rbsp, sps->ScalingList4x4[3], 16, Default_4x4_Inter, Default_4x4_Inter); + scaling_list(rbsp, sps->ScalingList4x4[4], 16, Default_4x4_Inter, sps->ScalingList4x4[3]); + scaling_list(rbsp, sps->ScalingList4x4[5], 16, Default_4x4_Inter, sps->ScalingList4x4[4]); + + scaling_list(rbsp, sps->ScalingList8x8[0], 64, Default_8x8_Intra, Default_8x8_Intra); + scaling_list(rbsp, sps->ScalingList8x8[1], 64, Default_8x8_Inter, Default_8x8_Inter); + if (sps->chroma_format_idc == 3) { + scaling_list(rbsp, sps->ScalingList8x8[2], 64, Default_8x8_Intra, sps->ScalingList8x8[0]); + scaling_list(rbsp, sps->ScalingList8x8[3], 64, Default_8x8_Inter, sps->ScalingList8x8[1]); + scaling_list(rbsp, sps->ScalingList8x8[4], 64, Default_8x8_Intra, sps->ScalingList8x8[2]); + scaling_list(rbsp, sps->ScalingList8x8[5], 64, Default_8x8_Inter, sps->ScalingList8x8[3]); + } + } + } else if (profile_idc == 183) + sps->chroma_format_idc = 0; + else + sps->chroma_format_idc = 1; + + sps->log2_max_frame_num_minus4 = vl_rbsp_ue(rbsp); + + sps->pic_order_cnt_type = vl_rbsp_ue(rbsp); + + if (sps->pic_order_cnt_type == 0) + sps->log2_max_pic_order_cnt_lsb_minus4 = vl_rbsp_ue(rbsp); + else if (sps->pic_order_cnt_type == 1) { + sps->delta_pic_order_always_zero_flag = vl_rbsp_u(rbsp, 1); + + sps->offset_for_non_ref_pic = vl_rbsp_se(rbsp); + + sps->offset_for_top_to_bottom_field = vl_rbsp_se(rbsp); + + sps->num_ref_frames_in_pic_order_cnt_cycle = vl_rbsp_ue(rbsp); + + for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; ++i) + sps->offset_for_ref_frame[i] = vl_rbsp_se(rbsp); + } + + sps->max_num_ref_frames = vl_rbsp_ue(rbsp); + + /* gaps_in_frame_num_value_allowed_flag */ + vl_rbsp_u(rbsp, 1); + + /* pic_width_in_mbs_minus1 */ + vl_rbsp_ue(rbsp); + + /* pic_height_in_map_units_minus1 */ + vl_rbsp_ue(rbsp); + + sps->frame_mbs_only_flag = vl_rbsp_u(rbsp, 1); + if (!sps->frame_mbs_only_flag) + sps->mb_adaptive_frame_field_flag = vl_rbsp_u(rbsp, 1); + + sps->direct_8x8_inference_flag = vl_rbsp_u(rbsp, 1); + + /* frame_cropping_flag */ + if (vl_rbsp_u(rbsp, 1)) { + /* frame_crop_left_offset */ + vl_rbsp_ue(rbsp); + + /* frame_crop_right_offset */ + vl_rbsp_ue(rbsp); + + /* frame_crop_top_offset */ + vl_rbsp_ue(rbsp); + + /* frame_crop_bottom_offset */ + vl_rbsp_ue(rbsp); + } + + /* vui_parameters_present_flag */ + if (vl_rbsp_u(rbsp, 1)) + vui_parameters(rbsp); +} + +static struct pipe_h264_pps *pic_parameter_set_id(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + unsigned id = vl_rbsp_ue(rbsp); + if (id >= ARRAY_SIZE(priv->codec_data.h264.pps)) + return NULL; /* invalid pic_parameter_set_id */ + + return &priv->codec_data.h264.pps[id]; +} + +static void picture_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + struct pipe_h264_sps *sps; + struct pipe_h264_pps *pps; + unsigned i; + + pps = pic_parameter_set_id(priv, rbsp); + if (!pps) + return; + + memset(pps, 0, sizeof(*pps)); + + sps = pps->sps = seq_parameter_set_id(priv, rbsp); + if (!sps) + return; + + memcpy(pps->ScalingList4x4, sps->ScalingList4x4, sizeof(pps->ScalingList4x4)); + memcpy(pps->ScalingList8x8, sps->ScalingList8x8, sizeof(pps->ScalingList8x8)); + + pps->entropy_coding_mode_flag = vl_rbsp_u(rbsp, 1); + + pps->bottom_field_pic_order_in_frame_present_flag = vl_rbsp_u(rbsp, 1); + + pps->num_slice_groups_minus1 = vl_rbsp_ue(rbsp); + if (pps->num_slice_groups_minus1 > 0) { + pps->slice_group_map_type = vl_rbsp_ue(rbsp); + + if (pps->slice_group_map_type == 0) { + + for (i = 0; i <= pps->num_slice_groups_minus1; ++i) + /* run_length_minus1[i] */ + vl_rbsp_ue(rbsp); + + } else if (pps->slice_group_map_type == 2) { + + for (i = 0; i <= pps->num_slice_groups_minus1; ++i) { + /* top_left[i] */ + vl_rbsp_ue(rbsp); + + /* bottom_right[i] */ + vl_rbsp_ue(rbsp); + } + + } else if (pps->slice_group_map_type >= 3 && pps->slice_group_map_type <= 5) { + + /* slice_group_change_direction_flag */ + vl_rbsp_u(rbsp, 1); + + pps->slice_group_change_rate_minus1 = vl_rbsp_ue(rbsp); + + } else if (pps->slice_group_map_type == 6) { + + unsigned pic_size_in_map_units_minus1; + + pic_size_in_map_units_minus1 = vl_rbsp_ue(rbsp); + + for (i = 0; i <= pic_size_in_map_units_minus1; ++i) + /* slice_group_id[i] */ + vl_rbsp_u(rbsp, log2(pps->num_slice_groups_minus1 + 1)); + } + } + + pps->num_ref_idx_l0_default_active_minus1 = vl_rbsp_ue(rbsp); + + pps->num_ref_idx_l1_default_active_minus1 = vl_rbsp_ue(rbsp); + + pps->weighted_pred_flag = vl_rbsp_u(rbsp, 1); + + pps->weighted_bipred_idc = vl_rbsp_u(rbsp, 2); + + pps->pic_init_qp_minus26 = vl_rbsp_se(rbsp); + + /* pic_init_qs_minus26 */ + vl_rbsp_se(rbsp); + + pps->chroma_qp_index_offset = vl_rbsp_se(rbsp); + + pps->deblocking_filter_control_present_flag = vl_rbsp_u(rbsp, 1); + + pps->constrained_intra_pred_flag = vl_rbsp_u(rbsp, 1); + + pps->redundant_pic_cnt_present_flag = vl_rbsp_u(rbsp, 1); + + if (vl_rbsp_more_data(rbsp)) { + pps->transform_8x8_mode_flag = vl_rbsp_u(rbsp, 1); + + /* pic_scaling_matrix_present_flag */ + if (vl_rbsp_u(rbsp, 1)) { + + scaling_list(rbsp, pps->ScalingList4x4[0], 16, Default_4x4_Intra, + sps->seq_scaling_matrix_present_flag ? NULL : Default_4x4_Intra); + scaling_list(rbsp, pps->ScalingList4x4[1], 16, Default_4x4_Intra, pps->ScalingList4x4[0]); + scaling_list(rbsp, pps->ScalingList4x4[2], 16, Default_4x4_Intra, pps->ScalingList4x4[1]); + scaling_list(rbsp, pps->ScalingList4x4[3], 16, Default_4x4_Inter, + sps->seq_scaling_matrix_present_flag ? NULL : Default_4x4_Inter); + scaling_list(rbsp, pps->ScalingList4x4[4], 16, Default_4x4_Inter, pps->ScalingList4x4[3]); + scaling_list(rbsp, pps->ScalingList4x4[5], 16, Default_4x4_Inter, pps->ScalingList4x4[4]); + + if (pps->transform_8x8_mode_flag) { + scaling_list(rbsp, pps->ScalingList8x8[0], 64, Default_8x8_Intra, + sps->seq_scaling_matrix_present_flag ? NULL : Default_8x8_Intra); + scaling_list(rbsp, pps->ScalingList8x8[1], 64, Default_8x8_Inter, + sps->seq_scaling_matrix_present_flag ? NULL : Default_8x8_Inter); + if (sps->chroma_format_idc == 3) { + scaling_list(rbsp, pps->ScalingList8x8[2], 64, Default_8x8_Intra, pps->ScalingList8x8[0]); + scaling_list(rbsp, pps->ScalingList8x8[3], 64, Default_8x8_Inter, pps->ScalingList8x8[1]); + scaling_list(rbsp, pps->ScalingList8x8[4], 64, Default_8x8_Intra, pps->ScalingList8x8[2]); + scaling_list(rbsp, pps->ScalingList8x8[5], 64, Default_8x8_Inter, pps->ScalingList8x8[3]); + } + } + } + + pps->second_chroma_qp_index_offset = vl_rbsp_se(rbsp); + } +} + +static void ref_pic_list_mvc_modification(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + // TODO + assert(0); +} + +static void ref_pic_list_modification(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + enum pipe_h264_slice_type slice_type) +{ + unsigned modification_of_pic_nums_idc; + + if (slice_type != 2 && slice_type != 4) { + /* ref_pic_list_modification_flag_l0 */ + if (vl_rbsp_u(rbsp, 1)) { + do { + modification_of_pic_nums_idc = vl_rbsp_ue(rbsp); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) + /* abs_diff_pic_num_minus1 */ + vl_rbsp_ue(rbsp); + else if (modification_of_pic_nums_idc == 2) + /* long_term_pic_num */ + vl_rbsp_ue(rbsp); + } while (modification_of_pic_nums_idc != 3); + } + } + + if (slice_type == 1) { + /* ref_pic_list_modification_flag_l1 */ + if (vl_rbsp_u(rbsp, 1)) { + do { + modification_of_pic_nums_idc = vl_rbsp_ue(rbsp); + if (modification_of_pic_nums_idc == 0 || + modification_of_pic_nums_idc == 1) + /* abs_diff_pic_num_minus1 */ + vl_rbsp_ue(rbsp); + else if (modification_of_pic_nums_idc == 2) + /* long_term_pic_num */ + vl_rbsp_ue(rbsp); + } while (modification_of_pic_nums_idc != 3); + } + } +} + +static void pred_weight_table(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + struct pipe_h264_sps *sps, enum pipe_h264_slice_type slice_type) +{ + unsigned ChromaArrayType = sps->separate_colour_plane_flag ? 0 : sps->chroma_format_idc; + unsigned i, j; + + /* luma_log2_weight_denom */ + vl_rbsp_ue(rbsp); + + if (ChromaArrayType != 0) + /* chroma_log2_weight_denom */ + vl_rbsp_ue(rbsp); + + for (i = 0; i <= priv->picture.h264.num_ref_idx_l0_active_minus1; ++i) { + /* luma_weight_l0_flag */ + if (vl_rbsp_u(rbsp, 1)) { + /* luma_weight_l0[i] */ + vl_rbsp_se(rbsp); + /* luma_offset_l0[i] */ + vl_rbsp_se(rbsp); + } + if (ChromaArrayType != 0) { + /* chroma_weight_l0_flag */ + if (vl_rbsp_u(rbsp, 1)) { + for (j = 0; j < 2; ++j) { + /* chroma_weight_l0[i][j] */ + vl_rbsp_se(rbsp); + /* chroma_offset_l0[i][j] */ + vl_rbsp_se(rbsp); + } + } + } + } + + if (slice_type == 1) { + for (i = 0; i <= priv->picture.h264.num_ref_idx_l1_active_minus1; ++i) { + /* luma_weight_l1_flag */ + if (vl_rbsp_u(rbsp, 1)) { + /* luma_weight_l1[i] */ + vl_rbsp_se(rbsp); + /* luma_offset_l1[i] */ + vl_rbsp_se(rbsp); + } + if (ChromaArrayType != 0) { + /* chroma_weight_l1_flag */ + if (vl_rbsp_u(rbsp, 1)) { + for (j = 0; j < 2; ++j) { + /* chroma_weight_l1[i][j] */ + vl_rbsp_se(rbsp); + /* chroma_offset_l1[i][j] */ + vl_rbsp_se(rbsp); + } + } + } + } + } +} + +static void dec_ref_pic_marking(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + bool IdrPicFlag) +{ + unsigned memory_management_control_operation; + + if (IdrPicFlag) { + /* no_output_of_prior_pics_flag */ + vl_rbsp_u(rbsp, 1); + /* long_term_reference_flag */ + vl_rbsp_u(rbsp, 1); + } else { + /* adaptive_ref_pic_marking_mode_flag */ + if (vl_rbsp_u(rbsp, 1)) { + do { + memory_management_control_operation = vl_rbsp_ue(rbsp); + + if (memory_management_control_operation == 1 || + memory_management_control_operation == 3) + /* difference_of_pic_nums_minus1 */ + vl_rbsp_ue(rbsp); + + if (memory_management_control_operation == 2) + /* long_term_pic_num */ + vl_rbsp_ue(rbsp); + + if (memory_management_control_operation == 3 || + memory_management_control_operation == 6) + /* long_term_frame_idx */ + vl_rbsp_ue(rbsp); + + if (memory_management_control_operation == 4) + /* max_long_term_frame_idx_plus1 */ + vl_rbsp_ue(rbsp); + } while (memory_management_control_operation != 0); + } + } +} + +static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + unsigned nal_ref_idc, unsigned nal_unit_type) +{ + enum pipe_h264_slice_type slice_type; + struct pipe_h264_pps *pps; + struct pipe_h264_sps *sps; + unsigned frame_num, prevFrameNum; + bool IdrPicFlag = nal_unit_type == 5; + + if (IdrPicFlag != priv->codec_data.h264.IdrPicFlag) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.IdrPicFlag = IdrPicFlag; + + /* first_mb_in_slice */ + vl_rbsp_ue(rbsp); + + slice_type = vl_rbsp_ue(rbsp) % 5; + + pps = pic_parameter_set_id(priv, rbsp); + if (!pps) + return; + + sps = pps->sps; + if (!sps) + return; + + if (pps != priv->picture.h264.pps) + vid_dec_h264_EndFrame(priv); + + priv->picture.h264.pps = pps; + + if (sps->separate_colour_plane_flag == 1 ) + /* colour_plane_id */ + vl_rbsp_u(rbsp, 2); + + frame_num = vl_rbsp_u(rbsp, sps->log2_max_frame_num_minus4 + 4); + + if (frame_num != priv->picture.h264.frame_num) + vid_dec_h264_EndFrame(priv); + + prevFrameNum = priv->picture.h264.frame_num; + priv->picture.h264.frame_num = frame_num; + + priv->picture.h264.field_pic_flag = 0; + priv->picture.h264.bottom_field_flag = 0; + + if (!sps->frame_mbs_only_flag) { + unsigned field_pic_flag = vl_rbsp_u(rbsp, 1); + + if (!field_pic_flag && field_pic_flag != priv->picture.h264.field_pic_flag) + vid_dec_h264_EndFrame(priv); + + priv->picture.h264.field_pic_flag = field_pic_flag; + + if (priv->picture.h264.field_pic_flag) { + unsigned bottom_field_flag = vl_rbsp_u(rbsp, 1); + + if (bottom_field_flag != priv->picture.h264.bottom_field_flag) + vid_dec_h264_EndFrame(priv); + + priv->picture.h264.bottom_field_flag = bottom_field_flag; + } + } + + if (IdrPicFlag) { + unsigned idr_pic_id = vl_rbsp_ue(rbsp); + + if (idr_pic_id != priv->codec_data.h264.idr_pic_id) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.idr_pic_id = idr_pic_id; + } + + if (sps->pic_order_cnt_type == 0) { + unsigned log2_max_pic_order_cnt_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + unsigned max_pic_order_cnt_lsb = 1 << log2_max_pic_order_cnt_lsb; + int pic_order_cnt_lsb = vl_rbsp_u(rbsp, log2_max_pic_order_cnt_lsb); + int pic_order_cnt_msb; + + if (pic_order_cnt_lsb != priv->codec_data.h264.pic_order_cnt_lsb) + vid_dec_h264_EndFrame(priv); + + if (IdrPicFlag) { + priv->codec_data.h264.pic_order_cnt_msb = 0; + priv->codec_data.h264.pic_order_cnt_lsb = 0; + } + + if ((pic_order_cnt_lsb < priv->codec_data.h264.pic_order_cnt_lsb) && + (priv->codec_data.h264.pic_order_cnt_lsb - pic_order_cnt_lsb) >= (max_pic_order_cnt_lsb / 2)) + pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb + max_pic_order_cnt_lsb; + + else if ((pic_order_cnt_lsb > priv->codec_data.h264.pic_order_cnt_lsb) && + (pic_order_cnt_lsb - priv->codec_data.h264.pic_order_cnt_lsb) > (max_pic_order_cnt_lsb / 2)) + pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb - max_pic_order_cnt_lsb; + + else + pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb; + + priv->codec_data.h264.pic_order_cnt_msb = pic_order_cnt_msb; + priv->codec_data.h264.pic_order_cnt_lsb = pic_order_cnt_lsb; + + if (pps->bottom_field_pic_order_in_frame_present_flag && !priv->picture.h264.field_pic_flag) { + unsigned delta_pic_order_cnt_bottom = vl_rbsp_se(rbsp); + + if (delta_pic_order_cnt_bottom != priv->codec_data.h264.delta_pic_order_cnt_bottom) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom; + } + + if (!priv->picture.h264.field_pic_flag) { + priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb; + priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] + + priv->codec_data.h264.delta_pic_order_cnt_bottom; + } else if (!priv->picture.h264.bottom_field_flag) + priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb; + else + priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb; + + } else if (sps->pic_order_cnt_type == 1) { + unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4); + unsigned FrameNumOffset, absFrameNum, expectedPicOrderCnt; + + if (!sps->delta_pic_order_always_zero_flag) { + unsigned delta_pic_order_cnt[2]; + + delta_pic_order_cnt[0] = vl_rbsp_se(rbsp); + + if (delta_pic_order_cnt[0] != priv->codec_data.h264.delta_pic_order_cnt[0]) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.delta_pic_order_cnt[0] = delta_pic_order_cnt[0]; + + if (pps->bottom_field_pic_order_in_frame_present_flag && !priv->picture.h264.field_pic_flag) { + delta_pic_order_cnt[1] = vl_rbsp_se(rbsp); + + if (delta_pic_order_cnt[1] != priv->codec_data.h264.delta_pic_order_cnt[1]) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.delta_pic_order_cnt[1] = delta_pic_order_cnt[1]; + } + } + + if (IdrPicFlag) + FrameNumOffset = 0; + else if (prevFrameNum > frame_num) + FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset + MaxFrameNum; + else + FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset; + + priv->codec_data.h264.prevFrameNumOffset = FrameNumOffset; + + if (sps->num_ref_frames_in_pic_order_cnt_cycle != 0) + absFrameNum = FrameNumOffset + frame_num; + else + absFrameNum = 0; + + if (nal_ref_idc == 0 && absFrameNum > 0) + absFrameNum = absFrameNum - 1; + + if (absFrameNum > 0) { + unsigned picOrderCntCycleCnt = (absFrameNum - 1) / sps->num_ref_frames_in_pic_order_cnt_cycle; + unsigned frameNumInPicOrderCntCycle = (absFrameNum - 1) % sps->num_ref_frames_in_pic_order_cnt_cycle; + signed ExpectedDeltaPerPicOrderCntCycle = 0; + unsigned i; + + for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; ++i) + ExpectedDeltaPerPicOrderCntCycle += sps->offset_for_ref_frame[i]; + + expectedPicOrderCnt = picOrderCntCycleCnt * ExpectedDeltaPerPicOrderCntCycle; + for (i = 0; i <= frameNumInPicOrderCntCycle; ++i) + expectedPicOrderCnt += sps->offset_for_ref_frame[i]; + + } else + expectedPicOrderCnt = 0; + + if (nal_ref_idc == 0) + expectedPicOrderCnt += sps->offset_for_non_ref_pic; + + if (!priv->picture.h264.field_pic_flag) { + priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0]; + priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt[0] + + sps->offset_for_top_to_bottom_field + priv->codec_data.h264.delta_pic_order_cnt[1]; + + } else if (!priv->picture.h264.bottom_field_flag) + priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0]; + else + priv->picture.h264.field_order_cnt[1] = expectedPicOrderCnt + sps->offset_for_top_to_bottom_field + + priv->codec_data.h264.delta_pic_order_cnt[0]; + + } else if (sps->pic_order_cnt_type == 2) { + unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4); + unsigned FrameNumOffset, tempPicOrderCnt; + + if (IdrPicFlag) + FrameNumOffset = 0; + else if (prevFrameNum > frame_num) + FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset + MaxFrameNum; + else + FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset; + + priv->codec_data.h264.prevFrameNumOffset = FrameNumOffset; + + if (IdrPicFlag) + tempPicOrderCnt = 0; + else if (nal_ref_idc == 0) + tempPicOrderCnt = 2 * (FrameNumOffset + frame_num) - 1; + else + tempPicOrderCnt = 2 * (FrameNumOffset + frame_num); + + if (!priv->picture.h264.field_pic_flag) { + priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt; + priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt; + + } else if (!priv->picture.h264.bottom_field_flag) + priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt; + else + priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt; + } + + if (pps->redundant_pic_cnt_present_flag) + /* redundant_pic_cnt */ + vl_rbsp_ue(rbsp); + + if (slice_type == PIPE_H264_SLICE_TYPE_B) + /* direct_spatial_mv_pred_flag */ + vl_rbsp_u(rbsp, 1); + + priv->picture.h264.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1; + priv->picture.h264.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1; + + if (slice_type == PIPE_H264_SLICE_TYPE_P || + slice_type == PIPE_H264_SLICE_TYPE_SP || + slice_type == PIPE_H264_SLICE_TYPE_B) { + + /* num_ref_idx_active_override_flag */ + if (vl_rbsp_u(rbsp, 1)) { + priv->picture.h264.num_ref_idx_l0_active_minus1 = vl_rbsp_ue(rbsp); + + if (slice_type == PIPE_H264_SLICE_TYPE_B) + priv->picture.h264.num_ref_idx_l1_active_minus1 = vl_rbsp_ue(rbsp); + } + } + + if (nal_unit_type == 20 || nal_unit_type == 21) + ref_pic_list_mvc_modification(priv, rbsp); + else + ref_pic_list_modification(priv, rbsp, slice_type); + + if ((pps->weighted_pred_flag && (slice_type == PIPE_H264_SLICE_TYPE_P || slice_type == PIPE_H264_SLICE_TYPE_SP)) || + (pps->weighted_bipred_idc == 1 && slice_type == PIPE_H264_SLICE_TYPE_B)) + pred_weight_table(priv, rbsp, sps, slice_type); + + if (nal_ref_idc != 0) + dec_ref_pic_marking(priv, rbsp, IdrPicFlag); + + if (pps->entropy_coding_mode_flag && slice_type != PIPE_H264_SLICE_TYPE_I && slice_type != PIPE_H264_SLICE_TYPE_SI) + /* cabac_init_idc */ + vl_rbsp_ue(rbsp); + + /* slice_qp_delta */ + vl_rbsp_se(rbsp); + + if (slice_type == PIPE_H264_SLICE_TYPE_SP || slice_type == PIPE_H264_SLICE_TYPE_SI) { + if (slice_type == PIPE_H264_SLICE_TYPE_SP) + /* sp_for_switch_flag */ + vl_rbsp_u(rbsp, 1); + + /*slice_qs_delta */ + vl_rbsp_se(rbsp); + } + + if (pps->deblocking_filter_control_present_flag) { + unsigned disable_deblocking_filter_idc = vl_rbsp_ue(rbsp); + + if (disable_deblocking_filter_idc != 1) { + /* slice_alpha_c0_offset_div2 */ + vl_rbsp_se(rbsp); + + /* slice_beta_offset_div2 */ + vl_rbsp_se(rbsp); + } + } + + if (pps->num_slice_groups_minus1 > 0 && pps->slice_group_map_type >= 3 && pps->slice_group_map_type <= 5) + /* slice_group_change_cycle */ + vl_rbsp_u(rbsp, 2); +} + +static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left) +{ + unsigned nal_ref_idc, nal_unit_type; + + if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00)) + return; + + if (vl_vlc_peekbits(vlc, 24) != 0x000001) { + vl_vlc_eatbits(vlc, 8); + return; + } + + if (priv->slice) { + unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8); + ++priv->picture.h264.slice_count; + priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base, + 1, &priv->slice, &bytes); + priv->slice = NULL; + } + + vl_vlc_eatbits(vlc, 24); + + /* forbidden_zero_bit */ + vl_vlc_eatbits(vlc, 1); + + nal_ref_idc = vl_vlc_get_uimsbf(vlc, 2); + + if (nal_ref_idc != priv->codec_data.h264.nal_ref_idc && + (nal_ref_idc * priv->codec_data.h264.nal_ref_idc) == 0) + vid_dec_h264_EndFrame(priv); + + priv->codec_data.h264.nal_ref_idc = nal_ref_idc; + + nal_unit_type = vl_vlc_get_uimsbf(vlc, 5); + + if (nal_unit_type != 1 && nal_unit_type != 5) + vid_dec_h264_EndFrame(priv); + + if (nal_unit_type == 7) { + struct vl_rbsp rbsp; + vl_rbsp_init(&rbsp, vlc, ~0); + seq_parameter_set(priv, &rbsp); + + } else if (nal_unit_type == 8) { + struct vl_rbsp rbsp; + vl_rbsp_init(&rbsp, vlc, ~0); + picture_parameter_set(priv, &rbsp); + + } else if (nal_unit_type == 1 || nal_unit_type == 5) { + /* Coded slice of a non-IDR or IDR picture */ + unsigned bits = vl_vlc_valid_bits(vlc); + unsigned bytes = bits / 8 + 4; + struct vl_rbsp rbsp; + uint8_t buf[8]; + const void *ptr = buf; + unsigned i; + + buf[0] = 0x0; + buf[1] = 0x0; + buf[2] = 0x1; + buf[3] = (nal_ref_idc << 5) | nal_unit_type; + for (i = 4; i < bytes; ++i) + buf[i] = vl_vlc_peekbits(vlc, bits) >> ((bytes - i - 1) * 8); + + priv->bytes_left = (vl_vlc_bits_left(vlc) - bits) / 8; + priv->slice = vlc->data; + + vl_rbsp_init(&rbsp, vlc, 128); + slice_header(priv, &rbsp, nal_ref_idc, nal_unit_type); + + vid_dec_h264_BeginFrame(priv); + + ++priv->picture.h264.slice_count; + priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base, + 1, &ptr, &bytes); + } + + /* resync to byte boundary */ + vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8); +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c new file mode 100644 index 000000000..3242dbe11 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c @@ -0,0 +1,1013 @@ +/************************************************************************** + * + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_video_codec.h" +#include "util/u_memory.h" +#include "util/u_video.h" +#include "vl/vl_rbsp.h" + +#include "entrypoint.h" +#include "vid_dec.h" + +#define DPB_MAX_SIZE 32 +#define MAX_NUM_REF_PICS 16 + +enum { + NAL_UNIT_TYPE_TRAIL_N = 0, + NAL_UNIT_TYPE_TRAIL_R = 1, + NAL_UNIT_TYPE_TSA_N = 2, + NAL_UNIT_TYPE_TSA_R = 3, + NAL_UNIT_TYPE_STSA_N = 4, + NAL_UNIT_TYPE_STSA_R = 5, + NAL_UNIT_TYPE_RADL_N = 6, + NAL_UNIT_TYPE_RADL_R = 7, + NAL_UNIT_TYPE_RASL_N = 8, + NAL_UNIT_TYPE_RASL_R = 9, + NAL_UNIT_TYPE_BLA_W_LP = 16, + NAL_UNIT_TYPE_BLA_W_RADL = 17, + NAL_UNIT_TYPE_BLA_N_LP = 18, + NAL_UNIT_TYPE_IDR_W_RADL = 19, + NAL_UNIT_TYPE_IDR_N_LP = 20, + NAL_UNIT_TYPE_CRA = 21, + NAL_UNIT_TYPE_SPS = 33, + NAL_UNIT_TYPE_PPS = 34, +}; + +static const uint8_t Default_8x8_Intra[64] = { + 16, 16, 16, 16, 17, 18, 21, 24, + 16, 16, 16, 16, 17, 19, 22, 25, + 16, 16, 17, 18, 20, 22, 25, 29, + 16, 16, 18, 21, 24, 27, 31, 36, + 17, 17, 20, 24, 30, 35, 41, 47, + 18, 19, 22, 27, 35, 44, 54, 65, + 21, 22, 25, 31, 41, 54, 70, 88, + 24, 25, 29, 36, 47, 65, 88, 115 +}; + +static const uint8_t Default_8x8_Inter[64] = { + 16, 16, 16, 16, 17, 18, 20, 24, + 16, 16, 16, 17, 18, 20, 24, 25, + 16, 16, 17, 18, 20, 24, 25, 28, + 16, 17, 18, 20, 24, 25, 28, 33, + 17, 18, 20, 24, 25, 28, 33, 41, + 18, 20, 24, 25, 28, 33, 41, 54, + 20, 24, 25, 28, 33, 41, 54, 71, + 24, 25, 28, 33, 41, 54, 71, 91 +}; + +struct dpb_list { + struct list_head list; + struct pipe_video_buffer *buffer; + OMX_TICKS timestamp; + unsigned poc; +}; + +struct ref_pic_set { + unsigned num_pics; + unsigned num_neg_pics; + unsigned num_pos_pics; + unsigned num_delta_poc; + int delta_poc[MAX_NUM_REF_PICS]; + bool used[MAX_NUM_REF_PICS]; +}; + +static bool is_idr_picture(unsigned nal_unit_type) +{ + return (nal_unit_type == NAL_UNIT_TYPE_IDR_W_RADL || + nal_unit_type == NAL_UNIT_TYPE_IDR_N_LP); +} + +/* broken link access picture */ +static bool is_bla_picture(unsigned nal_unit_type) +{ + return (nal_unit_type == NAL_UNIT_TYPE_BLA_W_LP || + nal_unit_type == NAL_UNIT_TYPE_BLA_W_RADL || + nal_unit_type == NAL_UNIT_TYPE_BLA_N_LP); +} + +/* random access point picture */ +static bool is_rap_picture(unsigned nal_unit_type) +{ + return (nal_unit_type >= NAL_UNIT_TYPE_BLA_W_LP && + nal_unit_type <= NAL_UNIT_TYPE_CRA); +} + +static bool is_slice_picture(unsigned nal_unit_type) +{ + return (nal_unit_type <= NAL_UNIT_TYPE_RASL_R || + is_rap_picture(nal_unit_type)); +} + +static void set_poc(vid_dec_PrivateType *priv, + unsigned nal_unit_type, int i) +{ + priv->picture.h265.CurrPicOrderCntVal = i; + + if (priv->codec_data.h265.temporal_id == 0 && + (nal_unit_type == NAL_UNIT_TYPE_TRAIL_R || + nal_unit_type == NAL_UNIT_TYPE_TSA_R || + nal_unit_type == NAL_UNIT_TYPE_STSA_R || + is_rap_picture(nal_unit_type))) + priv->codec_data.h265.slice_prev_poc = i; +} + +static unsigned get_poc(vid_dec_PrivateType *priv) +{ + return priv->picture.h265.CurrPicOrderCntVal; +} + +static void profile_tier(struct vl_rbsp *rbsp) +{ + int i; + + /* general_profile_space */ + vl_rbsp_u(rbsp, 2); + + /* general_tier_flag */ + vl_rbsp_u(rbsp, 1); + + /* general_profile_idc */ + vl_rbsp_u(rbsp, 5); + + /* general_profile_compatibility_flag */ + for(i = 0; i < 32; ++i) + vl_rbsp_u(rbsp, 1); + + /* general_progressive_source_flag */ + vl_rbsp_u(rbsp, 1); + + /* general_interlaced_source_flag */ + vl_rbsp_u(rbsp, 1); + + /* general_non_packed_constraint_flag */ + vl_rbsp_u(rbsp, 1); + + /* general_frame_only_constraint_flag */ + vl_rbsp_u(rbsp, 1); + + /* general_reserved_zero_44bits */ + vl_rbsp_u(rbsp, 16); + vl_rbsp_u(rbsp, 16); + vl_rbsp_u(rbsp, 12); +} + +static unsigned profile_tier_level(struct vl_rbsp *rbsp, + int max_sublayers_minus1) +{ + bool sub_layer_profile_present_flag[6]; + bool sub_layer_level_present_flag[6]; + unsigned level_idc; + int i; + + profile_tier(rbsp); + + /* general_level_idc */ + level_idc = vl_rbsp_u(rbsp, 8); + + for (i = 0; i < max_sublayers_minus1; ++i) { + sub_layer_profile_present_flag[i] = vl_rbsp_u(rbsp, 1); + sub_layer_level_present_flag[i] = vl_rbsp_u(rbsp, 1); + } + + if (max_sublayers_minus1 > 0) + for (i = max_sublayers_minus1; i < 8; ++i) + /* reserved_zero_2bits */ + vl_rbsp_u(rbsp, 2); + + for (i = 0; i < max_sublayers_minus1; ++i) { + if (sub_layer_profile_present_flag[i]) + profile_tier(rbsp); + + if (sub_layer_level_present_flag[i]) + /* sub_layer_level_idc */ + vl_rbsp_u(rbsp, 8); + } + + return level_idc; +} + +static void scaling_list_data(vid_dec_PrivateType *priv, + struct vl_rbsp *rbsp, struct pipe_h265_sps *sps) +{ + unsigned size_id, matrix_id; + unsigned scaling_list_len[4] = { 16, 64, 64, 64 }; + uint8_t scaling_list4x4[6][64] = { }; + int i; + + uint8_t (*scaling_list_data[4])[6][64] = { + (uint8_t (*)[6][64])scaling_list4x4, + (uint8_t (*)[6][64])sps->ScalingList8x8, + (uint8_t (*)[6][64])sps->ScalingList16x16, + (uint8_t (*)[6][64])sps->ScalingList32x32 + }; + uint8_t (*scaling_list_dc_coeff[2])[6] = { + (uint8_t (*)[6])sps->ScalingListDCCoeff16x16, + (uint8_t (*)[6])sps->ScalingListDCCoeff32x32 + }; + + for (size_id = 0; size_id < 4; ++size_id) { + + for (matrix_id = 0; matrix_id < ((size_id == 3) ? 2 : 6); ++matrix_id) { + bool scaling_list_pred_mode_flag = vl_rbsp_u(rbsp, 1); + + if (!scaling_list_pred_mode_flag) { + /* scaling_list_pred_matrix_id_delta */; + unsigned matrix_id_with_delta = matrix_id - vl_rbsp_ue(rbsp); + + if (matrix_id != matrix_id_with_delta) { + memcpy((*scaling_list_data[size_id])[matrix_id], + (*scaling_list_data[size_id])[matrix_id_with_delta], + scaling_list_len[size_id]); + if (size_id > 1) + (*scaling_list_dc_coeff[size_id - 2])[matrix_id] = + (*scaling_list_dc_coeff[size_id - 2])[matrix_id_with_delta]; + } else { + const uint8_t *d; + + if (size_id == 0) + memset((*scaling_list_data[0])[matrix_id], 16, 16); + else { + if (size_id < 3) + d = (matrix_id < 3) ? Default_8x8_Intra : Default_8x8_Inter; + else + d = (matrix_id < 1) ? Default_8x8_Intra : Default_8x8_Inter; + memcpy((*scaling_list_data[size_id])[matrix_id], d, + scaling_list_len[size_id]); + } + if (size_id > 1) + (*scaling_list_dc_coeff[size_id - 2])[matrix_id] = 16; + } + } else { + int next_coef = 8; + int coef_num = MIN2(64, (1 << (4 + (size_id << 1)))); + + if (size_id > 1) { + /* scaling_list_dc_coef_minus8 */ + next_coef = vl_rbsp_se(rbsp) + 8; + (*scaling_list_dc_coeff[size_id - 2])[matrix_id] = next_coef; + } + + for (i = 0; i < coef_num; ++i) { + /* scaling_list_delta_coef */ + next_coef = (next_coef + vl_rbsp_se(rbsp) + 256) % 256; + (*scaling_list_data[size_id])[matrix_id][i] = next_coef; + } + } + } + } + + for (i = 0; i < 6; ++i) + memcpy(sps->ScalingList4x4[i], scaling_list4x4[i], 16); + + return; +} + +static void st_ref_pic_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + struct ref_pic_set *rps, struct pipe_h265_sps *sps, + unsigned idx) +{ + bool inter_rps_pred_flag; + unsigned delta_idx_minus1; + int delta_poc; + int i; + + inter_rps_pred_flag = (idx != 0) ? (vl_rbsp_u(rbsp, 1)) : false; + + if (inter_rps_pred_flag) { + struct ref_pic_set *ref_rps; + unsigned sign, abs; + int delta_rps; + bool used; + int j; + + if (idx == sps->num_short_term_ref_pic_sets) + delta_idx_minus1 = vl_rbsp_ue(rbsp); + else + delta_idx_minus1 = 0; + + ref_rps = (struct ref_pic_set *) + priv->codec_data.h265.ref_pic_set_list + idx - (delta_idx_minus1 + 1); + + /* delta_rps_sign */ + sign = vl_rbsp_u(rbsp, 1); + /* abs_delta_rps_minus1 */ + abs = vl_rbsp_ue(rbsp); + delta_rps = (1 - 2 * sign) * (abs + 1); + + rps->num_neg_pics = 0; + rps->num_pos_pics = 0; + rps->num_pics = 0; + + for(i = 0 ; i <= ref_rps->num_pics; ++i) { + /* used_by_curr_pic_flag */ + if (!vl_rbsp_u(rbsp, 1)) + /* use_delta_flag */ + vl_rbsp_u(rbsp, 1); + else { + delta_poc = delta_rps + + ((i < ref_rps->num_pics)? ref_rps->delta_poc[i] : 0); + rps->delta_poc[rps->num_pics] = delta_poc; + rps->used[rps->num_pics] = true; + if (delta_poc < 0) + rps->num_neg_pics++; + else + rps->num_pos_pics++; + rps->num_pics++; + } + } + + rps->num_delta_poc = ref_rps->num_pics; + + /* sort delta poc */ + for (i = 1; i < rps->num_pics; ++i) { + delta_poc = rps->delta_poc[i]; + used = rps->used[i]; + for (j = i - 1; j >= 0; j--) { + if (delta_poc < rps->delta_poc[j]) { + rps->delta_poc[j + 1] = rps->delta_poc[j]; + rps->used[j + 1] = rps->used[j]; + rps->delta_poc[j] = delta_poc; + rps->used[j] = used; + } + } + } + + for (i = 0 , j = rps->num_neg_pics - 1; + i < rps->num_neg_pics >> 1; i++, j--) { + delta_poc = rps->delta_poc[i]; + used = rps->used[i]; + rps->delta_poc[i] = rps->delta_poc[j]; + rps->used[i] = rps->used[j]; + rps->delta_poc[j] = delta_poc; + rps->used[j] = used; + } + } else { + /* num_negative_pics */ + rps->num_neg_pics = vl_rbsp_ue(rbsp); + /* num_positive_pics */ + rps->num_pos_pics = vl_rbsp_ue(rbsp); + rps->num_pics = rps->num_neg_pics + rps->num_pos_pics; + + delta_poc = 0; + for(i = 0 ; i < rps->num_neg_pics; ++i) { + /* delta_poc_s0_minus1 */ + delta_poc -= (vl_rbsp_ue(rbsp) + 1); + rps->delta_poc[i] = delta_poc; + /* used_by_curr_pic_s0_flag */ + rps->used[i] = vl_rbsp_u(rbsp, 1); + } + + delta_poc = 0; + for(i = rps->num_neg_pics; i < rps->num_pics; ++i) { + /* delta_poc_s1_minus1 */ + delta_poc += (vl_rbsp_ue(rbsp) + 1); + rps->delta_poc[i] = delta_poc; + /* used_by_curr_pic_s1_flag */ + rps->used[i] = vl_rbsp_u(rbsp, 1); + } + } +} + +static struct pipe_h265_sps *seq_parameter_set_id(vid_dec_PrivateType *priv, + struct vl_rbsp *rbsp) +{ + unsigned id = vl_rbsp_ue(rbsp); + + if (id >= ARRAY_SIZE(priv->codec_data.h265.sps)) + return NULL; + + return &priv->codec_data.h265.sps[id]; +} + +static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) +{ + struct pipe_h265_sps *sps; + int sps_max_sub_layers_minus1; + unsigned i; + + /* sps_video_parameter_set_id */ + vl_rbsp_u(rbsp, 4); + + /* sps_max_sub_layers_minus1 */ + sps_max_sub_layers_minus1 = vl_rbsp_u(rbsp, 3); + + assert(sps_max_sub_layers_minus1 <= 6); + + /* sps_temporal_id_nesting_flag */ + vl_rbsp_u(rbsp, 1); + + priv->codec_data.h265.level_idc = + profile_tier_level(rbsp, sps_max_sub_layers_minus1); + + sps = seq_parameter_set_id(priv, rbsp); + if (!sps) + return; + + memset(sps, 0, sizeof(*sps)); + + sps->chroma_format_idc = vl_rbsp_ue(rbsp); + + if (sps->chroma_format_idc == 3) + sps->separate_colour_plane_flag = vl_rbsp_u(rbsp, 1); + + priv->codec_data.h265.pic_width_in_luma_samples = + sps->pic_width_in_luma_samples = vl_rbsp_ue(rbsp); + + priv->codec_data.h265.pic_height_in_luma_samples = + sps->pic_height_in_luma_samples = vl_rbsp_ue(rbsp); + + /* conformance_window_flag */ + if (vl_rbsp_u(rbsp, 1)) { + /* conf_win_left_offset */ + vl_rbsp_ue(rbsp); + /* conf_win_right_offset */ + vl_rbsp_ue(rbsp); + /* conf_win_top_offset */ + vl_rbsp_ue(rbsp); + /* conf_win_bottom_offset */ + vl_rbsp_ue(rbsp); + } + + sps->bit_depth_luma_minus8 = vl_rbsp_ue(rbsp); + sps->bit_depth_chroma_minus8 = vl_rbsp_ue(rbsp); + sps->log2_max_pic_order_cnt_lsb_minus4 = vl_rbsp_ue(rbsp); + + /* sps_sub_layer_ordering_info_present_flag */ + i = vl_rbsp_u(rbsp, 1) ? 0 : sps_max_sub_layers_minus1; + for (; i <= sps_max_sub_layers_minus1; ++i) { + sps->sps_max_dec_pic_buffering_minus1 = vl_rbsp_ue(rbsp); + /* sps_max_num_reorder_pics */ + vl_rbsp_ue(rbsp); + /* sps_max_latency_increase_plus */ + vl_rbsp_ue(rbsp); + } + + sps->log2_min_luma_coding_block_size_minus3 = vl_rbsp_ue(rbsp); + sps->log2_diff_max_min_luma_coding_block_size = vl_rbsp_ue(rbsp); + sps->log2_min_transform_block_size_minus2 = vl_rbsp_ue(rbsp); + sps->log2_diff_max_min_transform_block_size = vl_rbsp_ue(rbsp); + sps->max_transform_hierarchy_depth_inter = vl_rbsp_ue(rbsp); + sps->max_transform_hierarchy_depth_intra = vl_rbsp_ue(rbsp); + + sps->scaling_list_enabled_flag = vl_rbsp_u(rbsp, 1); + if (sps->scaling_list_enabled_flag) + /* sps_scaling_list_data_present_flag */ + if (vl_rbsp_u(rbsp, 1)) + scaling_list_data(priv, rbsp, sps); + + sps->amp_enabled_flag = vl_rbsp_u(rbsp, 1); + sps->sample_adaptive_offset_enabled_flag = vl_rbsp_u(rbsp, 1); + sps->pcm_enabled_flag = vl_rbsp_u(rbsp, 1); + if (sps->pcm_enabled_flag) { + sps->pcm_sample_bit_depth_luma_minus1 = vl_rbsp_u(rbsp, 4); + sps->pcm_sample_bit_depth_chroma_minus1 = vl_rbsp_u(rbsp, 4); + sps->log2_min_pcm_luma_coding_block_size_minus3 = vl_rbsp_ue(rbsp); + sps->log2_diff_max_min_pcm_luma_coding_block_size = vl_rbsp_ue(rbsp); + sps->pcm_loop_filter_disabled_flag = vl_rbsp_u(rbsp, 1); + } + + sps->num_short_term_ref_pic_sets = vl_rbsp_ue(rbsp); + + for (i = 0; i < sps->num_short_term_ref_pic_sets; ++i) { + struct ref_pic_set *rps; + + rps = (struct ref_pic_set *) + priv->codec_data.h265.ref_pic_set_list + i; + st_ref_pic_set(priv, rbsp, rps, sps, i); + } + + sps->long_term_ref_pics_present_flag = vl_rbsp_u(rbsp, 1); + if (sps->long_term_ref_pics_present_flag) { + sps->num_long_term_ref_pics_sps = vl_rbsp_ue(rbsp); + for (i = 0; i < sps->num_long_term_ref_pics_sps; ++i) { + /* lt_ref_pic_poc_lsb_sps */ + vl_rbsp_u(rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + /* used_by_curr_pic_lt_sps_flag */ + vl_rbsp_u(rbsp, 1); + } + } + + sps->sps_temporal_mvp_enabled_flag = vl_rbsp_u(rbsp, 1); + sps->strong_intra_smoothing_enabled_flag = vl_rbsp_u(rbsp, 1); +} + +static struct pipe_h265_pps *pic_parameter_set_id(vid_dec_PrivateType *priv, + struct vl_rbsp *rbsp) +{ + unsigned id = vl_rbsp_ue(rbsp); + + if (id >= ARRAY_SIZE(priv->codec_data.h265.pps)) + return NULL; + + return &priv->codec_data.h265.pps[id]; +} + +static void picture_parameter_set(vid_dec_PrivateType *priv, + struct vl_rbsp *rbsp) +{ + struct pipe_h265_sps *sps; + struct pipe_h265_pps *pps; + int i; + + pps = pic_parameter_set_id(priv, rbsp); + if (!pps) + return; + + memset(pps, 0, sizeof(*pps)); + sps = pps->sps = seq_parameter_set_id(priv, rbsp); + if (!sps) + return; + + pps->dependent_slice_segments_enabled_flag = vl_rbsp_u(rbsp, 1); + pps->output_flag_present_flag = vl_rbsp_u(rbsp, 1); + pps->num_extra_slice_header_bits = vl_rbsp_u(rbsp, 3); + pps->sign_data_hiding_enabled_flag = vl_rbsp_u(rbsp, 1); + pps->cabac_init_present_flag = vl_rbsp_u(rbsp, 1); + + pps->num_ref_idx_l0_default_active_minus1 = vl_rbsp_ue(rbsp); + pps->num_ref_idx_l1_default_active_minus1 = vl_rbsp_ue(rbsp); + pps->init_qp_minus26 = vl_rbsp_se(rbsp); + pps->constrained_intra_pred_flag = vl_rbsp_u(rbsp, 1); + pps->transform_skip_enabled_flag = vl_rbsp_u(rbsp, 1); + + pps->cu_qp_delta_enabled_flag = vl_rbsp_u(rbsp, 1); + if (pps->cu_qp_delta_enabled_flag) + pps->diff_cu_qp_delta_depth = vl_rbsp_ue(rbsp); + + pps->pps_cb_qp_offset = vl_rbsp_se(rbsp); + pps->pps_cr_qp_offset = vl_rbsp_se(rbsp); + pps->pps_slice_chroma_qp_offsets_present_flag = vl_rbsp_u(rbsp, 1); + + pps->weighted_pred_flag = vl_rbsp_u(rbsp, 1); + pps->weighted_bipred_flag = vl_rbsp_u(rbsp, 1); + + pps->transquant_bypass_enabled_flag = vl_rbsp_u(rbsp, 1); + pps->tiles_enabled_flag = vl_rbsp_u(rbsp, 1); + pps->entropy_coding_sync_enabled_flag = vl_rbsp_u(rbsp, 1); + + if (pps->tiles_enabled_flag) { + pps->num_tile_columns_minus1 = vl_rbsp_ue(rbsp); + pps->num_tile_rows_minus1 = vl_rbsp_ue(rbsp); + + pps->uniform_spacing_flag = vl_rbsp_u(rbsp, 1); + if (!pps->uniform_spacing_flag) { + for (i = 0; i < pps->num_tile_columns_minus1; ++i) + pps->column_width_minus1[i] = vl_rbsp_ue(rbsp); + + for (i = 0; i < pps->num_tile_rows_minus1; ++i) + pps->row_height_minus1[i] = vl_rbsp_ue(rbsp); + } + + if (!pps->num_tile_columns_minus1 || !pps->num_tile_rows_minus1) + pps->loop_filter_across_tiles_enabled_flag = vl_rbsp_u(rbsp, 1); + } + + pps->pps_loop_filter_across_slices_enabled_flag = vl_rbsp_u(rbsp, 1); + + pps->deblocking_filter_control_present_flag = vl_rbsp_u(rbsp, 1); + if (pps->deblocking_filter_control_present_flag) { + pps->deblocking_filter_override_enabled_flag = vl_rbsp_u(rbsp, 1); + pps->pps_deblocking_filter_disabled_flag = vl_rbsp_u(rbsp, 1); + if (!pps->pps_deblocking_filter_disabled_flag) { + pps->pps_beta_offset_div2 = vl_rbsp_se(rbsp); + pps->pps_tc_offset_div2 = vl_rbsp_se(rbsp); + } + } + + /* pps_scaling_list_data_present_flag */ + if (vl_rbsp_u(rbsp, 1)) + scaling_list_data(priv, rbsp, sps); + + pps->lists_modification_present_flag = vl_rbsp_u(rbsp, 1); + pps->log2_parallel_merge_level_minus2 = vl_rbsp_ue(rbsp); + pps->slice_segment_header_extension_present_flag = vl_rbsp_u(rbsp, 1); +} + +static void vid_dec_h265_BeginFrame(vid_dec_PrivateType *priv) +{ + if (priv->frame_started) + return; + + if (!priv->codec) { + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port = (omx_base_video_PortType *) + priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + + templat.profile = priv->profile; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.expect_chunked_decode = true; + templat.width = priv->codec_data.h265.pic_width_in_luma_samples; + templat.height = priv->codec_data.h265.pic_height_in_luma_samples; + templat.level = priv->codec_data.h265.level_idc; + priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); + + /* disable transcode tunnel if video size is different from coded size */ + if (priv->codec_data.h265.pic_width_in_luma_samples != + port->sPortParam.format.video.nFrameWidth || + priv->codec_data.h265.pic_height_in_luma_samples != + port->sPortParam.format.video.nFrameHeight) + priv->disable_tunnel = true; + } + + vid_dec_NeedTarget(priv); + + if (priv->first_buf_in_frame) + priv->timestamp = priv->timestamps[0]; + priv->first_buf_in_frame = false; + + priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = true; +} + +static struct pipe_video_buffer *vid_dec_h265_Flush(vid_dec_PrivateType *priv, + OMX_TICKS *timestamp) +{ + struct dpb_list *entry, *result = NULL; + struct pipe_video_buffer *buf; + + /* search for the lowest poc and break on zeros */ + LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h265.dpb_list, list) { + + if (result && entry->poc == 0) + break; + + if (!result || entry->poc < result->poc) + result = entry; + } + + if (!result) + return NULL; + + buf = result->buffer; + if (timestamp) + *timestamp = result->timestamp; + + --priv->codec_data.h265.dpb_num; + LIST_DEL(&result->list); + FREE(result); + + return buf; +} + +static void vid_dec_h265_EndFrame(vid_dec_PrivateType *priv) +{ + struct dpb_list *entry = NULL; + struct pipe_video_buffer *tmp; + struct ref_pic_set *rps; + int i; + OMX_TICKS timestamp; + + if (!priv->frame_started) + return; + + priv->picture.h265.NumPocStCurrBefore = 0; + priv->picture.h265.NumPocStCurrAfter = 0; + memset(priv->picture.h265.RefPicSetStCurrBefore, 0, 8); + memset(priv->picture.h265.RefPicSetStCurrAfter, 0, 8); + for (i = 0; i < MAX_NUM_REF_PICS; ++i) { + priv->picture.h265.ref[i] = NULL; + priv->picture.h265.PicOrderCntVal[i] = 0; + } + + rps = priv->codec_data.h265.rps; + + if (rps) { + unsigned bf = 0, af = 0; + + priv->picture.h265.NumDeltaPocsOfRefRpsIdx = rps->num_delta_poc; + for (i = 0; i < rps->num_pics; ++i) { + priv->picture.h265.PicOrderCntVal[i] = + rps->delta_poc[i] + get_poc(priv); + + LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h265.dpb_list, list) { + if (entry->poc == priv->picture.h265.PicOrderCntVal[i]) { + priv->picture.h265.ref[i] = entry->buffer; + break; + } + } + + if (rps->used[i]) { + if (i < rps->num_neg_pics) { + priv->picture.h265.NumPocStCurrBefore++; + priv->picture.h265.RefPicSetStCurrBefore[bf++] = i; + } else { + priv->picture.h265.NumPocStCurrAfter++; + priv->picture.h265.RefPicSetStCurrAfter[af++] = i; + } + } + } + } + + priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = false; + + /* add the decoded picture to the dpb list */ + entry = CALLOC_STRUCT(dpb_list); + if (!entry) + return; + + priv->first_buf_in_frame = true; + entry->buffer = priv->target; + entry->timestamp = priv->timestamp; + entry->poc = get_poc(priv); + + LIST_ADDTAIL(&entry->list, &priv->codec_data.h265.dpb_list); + ++priv->codec_data.h265.dpb_num; + priv->target = NULL; + + if (priv->codec_data.h265.dpb_num <= DPB_MAX_SIZE) + return; + + tmp = priv->in_buffers[0]->pInputPortPrivate; + priv->in_buffers[0]->pInputPortPrivate = vid_dec_h265_Flush(priv, ×tamp); + priv->in_buffers[0]->nTimeStamp = timestamp; + priv->target = tmp; + priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL; + if (priv->frame_finished && + (priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS)) + FREE(priv->codec_data.h265.ref_pic_set_list); +} + +static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, + unsigned nal_unit_type) +{ + struct pipe_h265_pps *pps; + struct pipe_h265_sps *sps; + bool first_slice_segment_in_pic_flag; + bool dependent_slice_segment_flag = false; + struct ref_pic_set *rps; + unsigned poc_lsb, poc_msb, slice_prev_poc; + unsigned max_poc_lsb, prev_poc_lsb, prev_poc_msb; + unsigned num_st_rps; + int i; + + if (priv->picture.h265.IDRPicFlag != is_idr_picture(nal_unit_type)) + vid_dec_h265_EndFrame(priv); + + priv->picture.h265.IDRPicFlag = is_idr_picture(nal_unit_type); + + first_slice_segment_in_pic_flag = vl_rbsp_u(rbsp, 1); + + if (is_rap_picture(nal_unit_type)) + /* no_output_of_prior_pics_flag */ + vl_rbsp_u(rbsp, 1); + + pps = pic_parameter_set_id(priv, rbsp); + if (!pps) + return; + + sps = pps->sps; + if (!sps) + return; + + if (pps != priv->picture.h265.pps) + vid_dec_h265_EndFrame(priv); + + priv->picture.h265.pps = pps; + + if (priv->picture.h265.RAPPicFlag != is_rap_picture(nal_unit_type)) + vid_dec_h265_EndFrame(priv); + priv->picture.h265.RAPPicFlag = is_rap_picture(nal_unit_type); + + num_st_rps = sps->num_short_term_ref_pic_sets; + + if (priv->picture.h265.CurrRpsIdx != num_st_rps) + vid_dec_h265_EndFrame(priv); + priv->picture.h265.CurrRpsIdx = num_st_rps; + + if (!first_slice_segment_in_pic_flag) { + int size, num; + int bits_slice_segment_address = 0; + + if (pps->dependent_slice_segments_enabled_flag) + dependent_slice_segment_flag = vl_rbsp_u(rbsp, 1); + + size = 1 << (sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size); + + num = ((sps->pic_width_in_luma_samples + size - 1) / size) * + ((sps->pic_height_in_luma_samples + size - 1) / size); + + while (num > (1 << bits_slice_segment_address)) + bits_slice_segment_address++; + + /* slice_segment_address */ + vl_rbsp_u(rbsp, bits_slice_segment_address); + } + + if (dependent_slice_segment_flag) + return; + + for (i = 0; i < pps->num_extra_slice_header_bits; ++i) + /* slice_reserved_flag */ + vl_rbsp_u(rbsp, 1); + + /* slice_type */ + vl_rbsp_ue(rbsp); + + if (pps->output_flag_present_flag) + /* pic output flag */ + vl_rbsp_u(rbsp, 1); + + if (sps->separate_colour_plane_flag) + /* colour_plane_id */ + vl_rbsp_u(rbsp, 2); + + if (is_idr_picture(nal_unit_type)) { + set_poc(priv, nal_unit_type, 0); + return; + } + + /* slice_pic_order_cnt_lsb */ + poc_lsb = + vl_rbsp_u(rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + + slice_prev_poc = (int)priv->codec_data.h265.slice_prev_poc; + max_poc_lsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + + prev_poc_lsb = slice_prev_poc & (max_poc_lsb - 1); + prev_poc_msb = slice_prev_poc - prev_poc_lsb; + + if ((poc_lsb < prev_poc_lsb) && + ((prev_poc_lsb - poc_lsb ) >= (max_poc_lsb / 2))) + poc_msb = prev_poc_msb + max_poc_lsb; + + else if ((poc_lsb > prev_poc_lsb ) && + ((poc_lsb - prev_poc_lsb) > (max_poc_lsb / 2))) + poc_msb = prev_poc_msb - max_poc_lsb; + + else + poc_msb = prev_poc_msb; + + if (is_bla_picture(nal_unit_type)) + poc_msb = 0; + + if (get_poc(priv) != poc_msb + poc_lsb) + vid_dec_h265_EndFrame(priv); + + set_poc(priv, nal_unit_type, (poc_msb + poc_lsb)); + + /* short_term_ref_pic_set_sps_flag */ + if (!vl_rbsp_u(rbsp, 1)) { + rps = (struct ref_pic_set *) + priv->codec_data.h265.ref_pic_set_list + num_st_rps; + st_ref_pic_set(priv, rbsp, rps, sps, num_st_rps); + + } else if (num_st_rps > 1) { + int num_bits = 0; + unsigned idx; + + while ((1 << num_bits) < num_st_rps) + num_bits++; + + if (num_bits > 0) + /* short_term_ref_pic_set_idx */ + idx = vl_rbsp_u(rbsp, num_bits); + else + idx = 0; + + rps = (struct ref_pic_set *) + priv->codec_data.h265.ref_pic_set_list + idx; + } else + rps = (struct ref_pic_set *) + priv->codec_data.h265.ref_pic_set_list; + + if (is_bla_picture(nal_unit_type)) { + rps->num_neg_pics = 0; + rps->num_pos_pics = 0; + rps->num_pics = 0; + } + + priv->codec_data.h265.rps = rps; + + return; +} + +static void vid_dec_h265_Decode(vid_dec_PrivateType *priv, + struct vl_vlc *vlc, + unsigned min_bits_left) +{ + unsigned nal_unit_type; + unsigned nuh_layer_id; + unsigned nuh_temporal_id_plus1; + + if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00)) + return; + + if (vl_vlc_peekbits(vlc, 24) != 0x000001) { + vl_vlc_eatbits(vlc, 8); + return; + } + + if (priv->slice) { + unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8); + + priv->codec->decode_bitstream(priv->codec, priv->target, + &priv->picture.base, 1, + &priv->slice, &bytes); + priv->slice = NULL; + } + + vl_vlc_eatbits(vlc, 24); + + /* forbidden_zero_bit */ + vl_vlc_eatbits(vlc, 1); + + if (vl_vlc_valid_bits(vlc) < 15) + vl_vlc_fillbits(vlc); + + nal_unit_type = vl_vlc_get_uimsbf(vlc, 6); + + /* nuh_layer_id */ + nuh_layer_id = vl_vlc_get_uimsbf(vlc, 6); + + /* nuh_temporal_id_plus1 */ + nuh_temporal_id_plus1 = vl_vlc_get_uimsbf(vlc, 3); + priv->codec_data.h265.temporal_id = nuh_temporal_id_plus1 - 1; + + if (!is_slice_picture(nal_unit_type)) + vid_dec_h265_EndFrame(priv); + + if (nal_unit_type == NAL_UNIT_TYPE_SPS) { + struct vl_rbsp rbsp; + + vl_rbsp_init(&rbsp, vlc, ~0); + seq_parameter_set(priv, &rbsp); + + } else if (nal_unit_type == NAL_UNIT_TYPE_PPS) { + struct vl_rbsp rbsp; + + vl_rbsp_init(&rbsp, vlc, ~0); + picture_parameter_set(priv, &rbsp); + + } else if (is_slice_picture(nal_unit_type)) { + unsigned bits = vl_vlc_valid_bits(vlc); + unsigned bytes = bits / 8 + 5; + struct vl_rbsp rbsp; + uint8_t buf[9]; + const void *ptr = buf; + unsigned i; + + buf[0] = 0x0; + buf[1] = 0x0; + buf[2] = 0x1; + buf[3] = nal_unit_type << 1 | nuh_layer_id >> 5; + buf[4] = nuh_layer_id << 3 | nuh_temporal_id_plus1; + for (i = 5; i < bytes; ++i) + buf[i] = vl_vlc_peekbits(vlc, bits) >> ((bytes - i - 1) * 8); + + priv->bytes_left = (vl_vlc_bits_left(vlc) - bits) / 8; + priv->slice = vlc->data; + + vl_rbsp_init(&rbsp, vlc, 128); + slice_header(priv, &rbsp, nal_unit_type); + + vid_dec_h265_BeginFrame(priv); + + priv->codec->decode_bitstream(priv->codec, priv->target, + &priv->picture.base, 1, + &ptr, &bytes); + } + + /* resync to byte boundary */ + vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8); +} + +void vid_dec_h265_Init(vid_dec_PrivateType *priv) +{ + priv->picture.base.profile = PIPE_VIDEO_PROFILE_HEVC_MAIN; + + LIST_INITHEAD(&priv->codec_data.h265.dpb_list); + priv->codec_data.h265.ref_pic_set_list = (struct ref_pic_set *) + CALLOC(MAX_NUM_REF_PICS, sizeof(struct ref_pic_set)); + + priv->Decode = vid_dec_h265_Decode; + priv->EndFrame = vid_dec_h265_EndFrame; + priv->Flush = vid_dec_h265_Flush; + priv->first_buf_in_frame = true; +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c new file mode 100644 index 000000000..f64c2b83f --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c @@ -0,0 +1,383 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#include "pipe/p_video_codec.h" +#include "vl/vl_vlc.h" +#include "vl/vl_zscan.h" + +#include "vid_dec.h" + +static uint8_t default_intra_matrix[64] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +}; + +static uint8_t default_non_intra_matrix[64] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 +}; + +static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); +static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv); +static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); + +void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv) +{ + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + templat.profile = priv->profile; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.max_references = 2; + templat.expect_chunked_decode = true; + templat.width = port->sPortParam.format.video.nFrameWidth; + templat.height = port->sPortParam.format.video.nFrameHeight; + + priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); + + priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN; + priv->picture.mpeg12.intra_matrix = default_intra_matrix; + priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix; + + priv->Decode = vid_dec_mpeg12_Decode; + priv->EndFrame = vid_dec_mpeg12_EndFrame; + priv->Flush = vid_dec_mpeg12_Flush; +} + +static void BeginFrame(vid_dec_PrivateType *priv) +{ + if (priv->picture.mpeg12.picture_coding_type != PIPE_MPEG12_PICTURE_CODING_TYPE_B) { + priv->picture.mpeg12.ref[0] = priv->picture.mpeg12.ref[1]; + priv->picture.mpeg12.ref[1] = NULL; + } + + if (priv->target == priv->picture.mpeg12.ref[0]) { + struct pipe_video_buffer *tmp = priv->target; + priv->target = priv->shadow; + priv->shadow = tmp; + } + + vid_dec_NeedTarget(priv); + + priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = true; +} + +static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv) +{ + struct pipe_video_buffer *done; + + priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base); + priv->frame_started = false; + + if (priv->picture.mpeg12.picture_coding_type != PIPE_MPEG12_PICTURE_CODING_TYPE_B) { + + priv->picture.mpeg12.ref[1] = priv->target; + done = priv->picture.mpeg12.ref[0]; + if (!done) { + priv->target = NULL; + return; + } + + } else + done = priv->target; + + priv->frame_finished = true; + priv->target = priv->in_buffers[0]->pInputPortPrivate; + priv->in_buffers[0]->pInputPortPrivate = done; +} + +static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp) +{ + struct pipe_video_buffer *result = priv->picture.mpeg12.ref[1]; + priv->picture.mpeg12.ref[1] = NULL; + if (timestamp) + *timestamp = OMX_VID_DEC_TIMESTAMP_INVALID; + return result; +} + +static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left) +{ + uint8_t code; + unsigned i; + + if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00)) + return; + + if (vl_vlc_peekbits(vlc, 24) != 0x000001) { + vl_vlc_eatbits(vlc, 8); + return; + } + + if (priv->slice) { + unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8); + priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base, + 1, &priv->slice, &bytes); + priv->slice = NULL; + } + + vl_vlc_eatbits(vlc, 24); + code = vl_vlc_get_uimsbf(vlc, 8); + + if (priv->frame_started && (code == 0x00 || code > 0xAF)) + vid_dec_mpeg12_EndFrame(priv); + + if (code == 0xB3) { + /* sequence header code */ + vl_vlc_fillbits(vlc); + + /* horizontal_size_value */ + vl_vlc_get_uimsbf(vlc, 12); + + /* vertical_size_value */ + vl_vlc_get_uimsbf(vlc, 12); + + /* aspect_ratio_information */ + vl_vlc_get_uimsbf(vlc, 4); + + /* frame_rate_code */ + vl_vlc_get_uimsbf(vlc, 4); + + vl_vlc_fillbits(vlc); + + /* bit_rate_value */ + vl_vlc_get_uimsbf(vlc, 18); + + /* marker_bit */ + vl_vlc_get_uimsbf(vlc, 1); + + /* vbv_buffer_size_value */ + vl_vlc_get_uimsbf(vlc, 10); + + /* constrained_parameters_flag */ + vl_vlc_get_uimsbf(vlc, 1); + + vl_vlc_fillbits(vlc); + + /* load_intra_quantiser_matrix */ + if (vl_vlc_get_uimsbf(vlc, 1)) { + /* intra_quantiser_matrix */ + priv->picture.mpeg12.intra_matrix = priv->codec_data.mpeg12.intra_matrix; + for (i = 0; i < 64; ++i) { + priv->codec_data.mpeg12.intra_matrix[vl_zscan_normal[i]] = vl_vlc_get_uimsbf(vlc, 8); + vl_vlc_fillbits(vlc); + } + } else + priv->picture.mpeg12.intra_matrix = default_intra_matrix; + + /* load_non_intra_quantiser_matrix */ + if (vl_vlc_get_uimsbf(vlc, 1)) { + /* non_intra_quantiser_matrix */ + priv->picture.mpeg12.non_intra_matrix = priv->codec_data.mpeg12.non_intra_matrix; + for (i = 0; i < 64; ++i) { + priv->codec_data.mpeg12.non_intra_matrix[i] = vl_vlc_get_uimsbf(vlc, 8); + vl_vlc_fillbits(vlc); + } + } else + priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix; + + } else if (code == 0x00) { + /* picture start code */ + vl_vlc_fillbits(vlc); + + /* temporal_reference */ + vl_vlc_get_uimsbf(vlc, 10); + + priv->picture.mpeg12.picture_coding_type = vl_vlc_get_uimsbf(vlc, 3); + + /* vbv_delay */ + vl_vlc_get_uimsbf(vlc, 16); + + vl_vlc_fillbits(vlc); + if (priv->picture.mpeg12.picture_coding_type == 2 || + priv->picture.mpeg12.picture_coding_type == 3) { + priv->picture.mpeg12.full_pel_forward_vector = vl_vlc_get_uimsbf(vlc, 1); + /* forward_f_code */ + priv->picture.mpeg12.f_code[0][0] = vl_vlc_get_uimsbf(vlc, 3) - 1; + priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0]; + } else { + priv->picture.mpeg12.full_pel_forward_vector = 0; + priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0] = 14; + } + + if (priv->picture.mpeg12.picture_coding_type == 3) { + priv->picture.mpeg12.full_pel_backward_vector = vl_vlc_get_uimsbf(vlc, 1); + /* backward_f_code */ + priv->picture.mpeg12.f_code[1][0] = vl_vlc_get_uimsbf(vlc, 3) - 1; + priv->picture.mpeg12.f_code[1][1] = priv->picture.mpeg12.f_code[1][0]; + } else { + priv->picture.mpeg12.full_pel_backward_vector = 0; + priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0] = 14; + } + + /* extra_bit_picture */ + while (vl_vlc_get_uimsbf(vlc, 1)) { + /* extra_information_picture */ + vl_vlc_get_uimsbf(vlc, 8); + vl_vlc_fillbits(vlc); + } + + } else if (code == 0xB5) { + /* extension start code */ + vl_vlc_fillbits(vlc); + + /* extension_start_code_identifier */ + switch (vl_vlc_get_uimsbf(vlc, 4)) { + case 0x3: /* quant matrix extension */ + + /* load_intra_quantiser_matrix */ + if (vl_vlc_get_uimsbf(vlc, 1)) { + /* intra_quantiser_matrix */ + priv->picture.mpeg12.intra_matrix = priv->codec_data.mpeg12.intra_matrix; + for (i = 0; i < 64; ++i) { + priv->codec_data.mpeg12.intra_matrix[vl_zscan_normal[i]] = vl_vlc_get_uimsbf(vlc, 8); + vl_vlc_fillbits(vlc); + } + } else + priv->picture.mpeg12.intra_matrix = default_intra_matrix; + + /* load_non_intra_quantiser_matrix */ + if (vl_vlc_get_uimsbf(vlc, 1)) { + /* non_intra_quantiser_matrix */ + priv->picture.mpeg12.non_intra_matrix = priv->codec_data.mpeg12.non_intra_matrix; + for (i = 0; i < 64; ++i) { + priv->codec_data.mpeg12.non_intra_matrix[i] = vl_vlc_get_uimsbf(vlc, 8); + vl_vlc_fillbits(vlc); + } + } else + priv->picture.mpeg12.intra_matrix = default_non_intra_matrix; + + break; + + case 0x8: /* picture coding extension */ + + priv->picture.mpeg12.f_code[0][0] = vl_vlc_get_uimsbf(vlc, 4) - 1; + priv->picture.mpeg12.f_code[0][1] = vl_vlc_get_uimsbf(vlc, 4) - 1; + priv->picture.mpeg12.f_code[1][0] = vl_vlc_get_uimsbf(vlc, 4) - 1; + priv->picture.mpeg12.f_code[1][1] = vl_vlc_get_uimsbf(vlc, 4) - 1; + priv->picture.mpeg12.intra_dc_precision = vl_vlc_get_uimsbf(vlc, 2); + priv->picture.mpeg12.picture_structure = vl_vlc_get_uimsbf(vlc, 2); + priv->picture.mpeg12.top_field_first = vl_vlc_get_uimsbf(vlc, 1); + priv->picture.mpeg12.frame_pred_frame_dct = vl_vlc_get_uimsbf(vlc, 1); + priv->picture.mpeg12.concealment_motion_vectors = vl_vlc_get_uimsbf(vlc, 1); + priv->picture.mpeg12.q_scale_type = vl_vlc_get_uimsbf(vlc, 1); + priv->picture.mpeg12.intra_vlc_format = vl_vlc_get_uimsbf(vlc, 1); + priv->picture.mpeg12.alternate_scan = vl_vlc_get_uimsbf(vlc, 1); + + /* repeat_first_field */ + vl_vlc_get_uimsbf(vlc, 1); + + /* chroma_420_type */ + vl_vlc_get_uimsbf(vlc, 1); + + vl_vlc_fillbits(vlc); + + /* progressive_frame */ + vl_vlc_get_uimsbf(vlc, 1); + + /* composite_display_flag */ + if (vl_vlc_get_uimsbf(vlc, 1)) { + + /* v_axis */ + vl_vlc_get_uimsbf(vlc, 1); + + /* field_sequence */ + vl_vlc_get_uimsbf(vlc, 3); + + /* sub_carrier */ + vl_vlc_get_uimsbf(vlc, 1); + + /* burst_amplitude */ + vl_vlc_get_uimsbf(vlc, 7); + + /* sub_carrier_phase */ + vl_vlc_get_uimsbf(vlc, 8); + } + break; + } + + } else if (code <= 0xAF) { + /* slice start */ + unsigned bytes = (vl_vlc_valid_bits(vlc) / 8) + 4; + uint8_t buf[12]; + const void *ptr = buf; + unsigned i; + + if (!priv->frame_started) + BeginFrame(priv); + + buf[0] = 0x00; + buf[1] = 0x00; + buf[2] = 0x01; + buf[3] = code; + for (i = 4; i < bytes; ++i) + buf[i] = vl_vlc_get_uimsbf(vlc, 8); + + priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base, + 1, &ptr, &bytes); + + priv->bytes_left = vl_vlc_bits_left(vlc) / 8; + priv->slice = vlc->data; + + } else if (code == 0xB2) { + /* user data start */ + + } else if (code == 0xB4) { + /* sequence error */ + } else if (code == 0xB7) { + /* sequence end */ + } else if (code == 0xB8) { + /* group start */ + } else if (code >= 0xB9) { + /* system start */ + } else { + /* reserved */ + } + + /* resync to byte boundary */ + vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8); +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c new file mode 100644 index 000000000..1a4fb62d4 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c @@ -0,0 +1,1278 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + + +#include <assert.h> + +#include <OMX_Video.h> + +/* bellagio defines a DEBUG macro that we don't want */ +#ifndef DEBUG +#include <bellagio/omxcore.h> +#undef DEBUG +#else +#include <bellagio/omxcore.h> +#endif + +#include <bellagio/omx_base_video_port.h> + +#include "pipe/p_screen.h" +#include "pipe/p_video_codec.h" +#include "util/u_memory.h" +#include "vl/vl_video_buffer.h" + +#include "entrypoint.h" +#include "vid_enc.h" + +struct encode_task { + struct list_head list; + + struct pipe_video_buffer *buf; + unsigned pic_order_cnt; + struct pipe_resource *bitstream; + void *feedback; +}; + +struct input_buf_private { + struct list_head tasks; + + struct pipe_resource *resource; + struct pipe_transfer *transfer; +}; + +struct output_buf_private { + struct pipe_resource *bitstream; + struct pipe_transfer *transfer; +}; + +static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name); +static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp); +static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param); +static OMX_ERRORTYPE vid_enc_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param); +static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config); +static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config); +static OMX_ERRORTYPE vid_enc_MessageHandler(OMX_COMPONENTTYPE *comp, internalRequestMessageType *msg); +static OMX_ERRORTYPE vid_enc_AllocateInBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf, + OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size); +static OMX_ERRORTYPE vid_enc_UseInBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE **buf, OMX_U32 idx, + OMX_PTR private, OMX_U32 size, OMX_U8 *mem); +static OMX_ERRORTYPE vid_enc_FreeInBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf); +static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf); +static OMX_ERRORTYPE vid_enc_AllocateOutBuffer(omx_base_PortType *comp, OMX_INOUT OMX_BUFFERHEADERTYPE **buf, + OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size); +static OMX_ERRORTYPE vid_enc_FreeOutBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf); +static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output); + +static void enc_ReleaseTasks(struct list_head *head); + +OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp) +{ + comp->componentVersion.s.nVersionMajor = 0; + comp->componentVersion.s.nVersionMinor = 0; + comp->componentVersion.s.nRevision = 0; + comp->componentVersion.s.nStep = 1; + comp->name_specific_length = 1; + comp->constructor = vid_enc_Constructor; + + comp->name = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (!comp->name) + return OMX_ErrorInsufficientResources; + + comp->name_specific = CALLOC(1, sizeof(char *)); + if (!comp->name_specific) + goto error_arrays; + + comp->role_specific = CALLOC(1, sizeof(char *)); + if (!comp->role_specific) + goto error_arrays; + + comp->name_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->name_specific[0] == NULL) + goto error_specific; + + comp->role_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE); + if (comp->role_specific[0] == NULL) + goto error_specific; + + strcpy(comp->name, OMX_VID_ENC_BASE_NAME); + strcpy(comp->name_specific[0], OMX_VID_ENC_AVC_NAME); + strcpy(comp->role_specific[0], OMX_VID_ENC_AVC_ROLE); + + return OMX_ErrorNone; + +error_specific: + FREE(comp->role_specific[0]); + FREE(comp->name_specific[0]); + +error_arrays: + FREE(comp->role_specific); + FREE(comp->name_specific); + + FREE(comp->name); + + return OMX_ErrorInsufficientResources; +} + +static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name) +{ + vid_enc_PrivateType *priv; + omx_base_video_PortType *port; + struct pipe_screen *screen; + OMX_ERRORTYPE r; + int i; + + assert(!comp->pComponentPrivate); + + priv = comp->pComponentPrivate = CALLOC(1, sizeof(vid_enc_PrivateType)); + if (!priv) + return OMX_ErrorInsufficientResources; + + r = omx_base_filter_Constructor(comp, name); + if (r) + return r; + + priv->BufferMgmtCallback = vid_enc_BufferEncoded; + priv->messageHandler = vid_enc_MessageHandler; + priv->destructor = vid_enc_Destructor; + + comp->SetParameter = vid_enc_SetParameter; + comp->GetParameter = vid_enc_GetParameter; + comp->GetConfig = vid_enc_GetConfig; + comp->SetConfig = vid_enc_SetConfig; + + priv->screen = omx_get_screen(); + if (!priv->screen) + return OMX_ErrorInsufficientResources; + + screen = priv->screen->pscreen; + if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH, + PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED)) + return OMX_ErrorBadParameter; + + priv->s_pipe = screen->context_create(screen, NULL, 0); + if (!priv->s_pipe) + return OMX_ErrorInsufficientResources; + + if (!vl_compositor_init(&priv->compositor, priv->s_pipe)) { + priv->s_pipe->destroy(priv->s_pipe); + priv->s_pipe = NULL; + return OMX_ErrorInsufficientResources; + } + + if (!vl_compositor_init_state(&priv->cstate, priv->s_pipe)) { + vl_compositor_cleanup(&priv->compositor); + priv->s_pipe->destroy(priv->s_pipe); + priv->s_pipe = NULL; + return OMX_ErrorInsufficientResources; + } + + priv->t_pipe = screen->context_create(screen, NULL, 0); + if (!priv->t_pipe) + return OMX_ErrorInsufficientResources; + + priv->sPortTypesParam[OMX_PortDomainVideo].nStartPortNumber = 0; + priv->sPortTypesParam[OMX_PortDomainVideo].nPorts = 2; + priv->ports = CALLOC(2, sizeof(omx_base_PortType *)); + if (!priv->ports) + return OMX_ErrorInsufficientResources; + + for (i = 0; i < 2; ++i) { + priv->ports[i] = CALLOC(1, sizeof(omx_base_video_PortType)); + if (!priv->ports[i]) + return OMX_ErrorInsufficientResources; + + base_video_port_Constructor(comp, &priv->ports[i], i, i == 0); + } + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + port->sPortParam.format.video.nFrameWidth = 176; + port->sPortParam.format.video.nFrameHeight = 144; + port->sPortParam.format.video.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar; + port->sVideoParam.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar; + port->sPortParam.nBufferCountActual = 8; + port->sPortParam.nBufferCountMin = 4; + + port->Port_SendBufferFunction = vid_enc_EncodeFrame; + port->Port_AllocateBuffer = vid_enc_AllocateInBuffer; + port->Port_UseBuffer = vid_enc_UseInBuffer; + port->Port_FreeBuffer = vid_enc_FreeInBuffer; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]; + strcpy(port->sPortParam.format.video.cMIMEType,"video/H264"); + port->sPortParam.format.video.nFrameWidth = 176; + port->sPortParam.format.video.nFrameHeight = 144; + port->sPortParam.format.video.eCompressionFormat = OMX_VIDEO_CodingAVC; + port->sVideoParam.eCompressionFormat = OMX_VIDEO_CodingAVC; + + port->Port_AllocateBuffer = vid_enc_AllocateOutBuffer; + port->Port_FreeBuffer = vid_enc_FreeOutBuffer; + + priv->bitrate.eControlRate = OMX_Video_ControlRateDisable; + priv->bitrate.nTargetBitrate = 0; + + priv->quant.nQpI = OMX_VID_ENC_QUANT_I_FRAMES_DEFAULT; + priv->quant.nQpP = OMX_VID_ENC_QUANT_P_FRAMES_DEFAULT; + priv->quant.nQpB = OMX_VID_ENC_QUANT_B_FRAMES_DEFAULT; + + priv->profile_level.eProfile = OMX_VIDEO_AVCProfileBaseline; + priv->profile_level.eLevel = OMX_VIDEO_AVCLevel51; + + priv->force_pic_type.IntraRefreshVOP = OMX_FALSE; + priv->frame_num = 0; + priv->pic_order_cnt = 0; + priv->restricted_b_frames = debug_get_bool_option("OMX_USE_RESTRICTED_B_FRAMES", FALSE); + + priv->scale.xWidth = OMX_VID_ENC_SCALING_WIDTH_DEFAULT; + priv->scale.xHeight = OMX_VID_ENC_SCALING_WIDTH_DEFAULT; + + LIST_INITHEAD(&priv->free_tasks); + LIST_INITHEAD(&priv->used_tasks); + LIST_INITHEAD(&priv->b_frames); + LIST_INITHEAD(&priv->stacked_tasks); + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp) +{ + vid_enc_PrivateType* priv = comp->pComponentPrivate; + int i; + + enc_ReleaseTasks(&priv->free_tasks); + enc_ReleaseTasks(&priv->used_tasks); + enc_ReleaseTasks(&priv->b_frames); + enc_ReleaseTasks(&priv->stacked_tasks); + + if (priv->ports) { + for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) { + if(priv->ports[i]) + priv->ports[i]->PortDestructor(priv->ports[i]); + } + FREE(priv->ports); + priv->ports=NULL; + } + + for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) + if (priv->scale_buffer[i]) + priv->scale_buffer[i]->destroy(priv->scale_buffer[i]); + + if (priv->s_pipe) { + vl_compositor_cleanup_state(&priv->cstate); + vl_compositor_cleanup(&priv->compositor); + priv->s_pipe->destroy(priv->s_pipe); + } + + if (priv->t_pipe) + priv->t_pipe->destroy(priv->t_pipe); + + if (priv->screen) + omx_put_screen(); + + return omx_workaround_Destructor(comp); +} + +static OMX_ERRORTYPE enc_AllocateBackTexture(omx_base_PortType *port, + struct pipe_resource **resource, + struct pipe_transfer **transfer, + OMX_U8 **map) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct pipe_resource buf_templ; + struct pipe_box box = {}; + OMX_U8 *ptr; + + memset(&buf_templ, 0, sizeof buf_templ); + buf_templ.target = PIPE_TEXTURE_2D; + buf_templ.format = PIPE_FORMAT_I8_UNORM; + buf_templ.bind = PIPE_BIND_LINEAR; + buf_templ.usage = PIPE_USAGE_STAGING; + buf_templ.flags = 0; + buf_templ.width0 = port->sPortParam.format.video.nFrameWidth; + buf_templ.height0 = port->sPortParam.format.video.nFrameHeight * 3 / 2; + buf_templ.depth0 = 1; + buf_templ.array_size = 1; + + *resource = priv->s_pipe->screen->resource_create(priv->s_pipe->screen, &buf_templ); + if (!*resource) + return OMX_ErrorInsufficientResources; + + box.width = (*resource)->width0; + box.height = (*resource)->height0; + box.depth = (*resource)->depth0; + ptr = priv->s_pipe->transfer_map(priv->s_pipe, *resource, 0, PIPE_TRANSFER_WRITE, &box, transfer); + if (map) + *map = ptr; + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + + if (!param) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexParamPortDefinition: { + OMX_PARAM_PORTDEFINITIONTYPE *def = param; + + r = omx_base_component_SetParameter(handle, idx, param); + if (r) + return r; + + if (def->nPortIndex == OMX_BASE_FILTER_INPUTPORT_INDEX) { + omx_base_video_PortType *port; + unsigned framesize; + struct pipe_resource *resource; + struct pipe_transfer *transfer; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + enc_AllocateBackTexture(priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX], + &resource, &transfer, NULL); + port->sPortParam.format.video.nStride = transfer->stride; + pipe_transfer_unmap(priv->s_pipe, transfer); + pipe_resource_reference(&resource, NULL); + + framesize = port->sPortParam.format.video.nStride * + port->sPortParam.format.video.nFrameHeight; + port->sPortParam.format.video.nSliceHeight = port->sPortParam.format.video.nFrameHeight; + port->sPortParam.nBufferSize = framesize * 3 / 2; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]; + port->sPortParam.nBufferSize = framesize * 512 / (16*16); + + priv->frame_rate = def->format.video.xFramerate; + + priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged, + OMX_BASE_FILTER_OUTPUTPORT_INDEX, 0, NULL); + } + break; + } + case OMX_IndexParamStandardComponentRole: { + OMX_PARAM_COMPONENTROLETYPE *role = param; + + r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE)); + if (r) + return r; + + if (strcmp((char *)role->cRole, OMX_VID_ENC_AVC_ROLE)) { + return OMX_ErrorBadParameter; + } + + break; + } + case OMX_IndexParamVideoBitrate: { + OMX_VIDEO_PARAM_BITRATETYPE *bitrate = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_BITRATETYPE)); + if (r) + return r; + + priv->bitrate = *bitrate; + + break; + } + case OMX_IndexParamVideoQuantization: { + OMX_VIDEO_PARAM_QUANTIZATIONTYPE *quant = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_QUANTIZATIONTYPE)); + if (r) + return r; + + priv->quant = *quant; + + break; + } + case OMX_IndexParamVideoProfileLevelCurrent: { + OMX_VIDEO_PARAM_PROFILELEVELTYPE *profile_level = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PROFILELEVELTYPE)); + if (r) + return r; + + priv->profile_level = *profile_level; + + break; + } + default: + return omx_base_component_SetParameter(handle, idx, param); + } + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + + if (!param) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexParamStandardComponentRole: { + OMX_PARAM_COMPONENTROLETYPE *role = param; + + r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE)); + if (r) + return r; + + strcpy((char *)role->cRole, OMX_VID_ENC_AVC_ROLE); + break; + } + case OMX_IndexParamVideoInit: + r = checkHeader(param, sizeof(OMX_PORT_PARAM_TYPE)); + if (r) + return r; + + memcpy(param, &priv->sPortTypesParam[OMX_PortDomainVideo], sizeof(OMX_PORT_PARAM_TYPE)); + break; + + case OMX_IndexParamVideoPortFormat: { + OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param; + omx_base_video_PortType *port; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + if (r) + return r; + + if (format->nPortIndex > 1) + return OMX_ErrorBadPortIndex; + if (format->nIndex >= 1) + return OMX_ErrorNoMore; + + port = (omx_base_video_PortType *)priv->ports[format->nPortIndex]; + memcpy(format, &port->sVideoParam, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE)); + break; + } + case OMX_IndexParamVideoBitrate: { + OMX_VIDEO_PARAM_BITRATETYPE *bitrate = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_BITRATETYPE)); + if (r) + return r; + + bitrate->eControlRate = priv->bitrate.eControlRate; + bitrate->nTargetBitrate = priv->bitrate.nTargetBitrate; + + break; + } + case OMX_IndexParamVideoQuantization: { + OMX_VIDEO_PARAM_QUANTIZATIONTYPE *quant = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_QUANTIZATIONTYPE)); + if (r) + return r; + + quant->nQpI = priv->quant.nQpI; + quant->nQpP = priv->quant.nQpP; + quant->nQpB = priv->quant.nQpB; + + break; + } + case OMX_IndexParamVideoProfileLevelCurrent: { + OMX_VIDEO_PARAM_PROFILELEVELTYPE *profile_level = param; + + r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PROFILELEVELTYPE)); + if (r) + return r; + + profile_level->eProfile = priv->profile_level.eProfile; + profile_level->eLevel = priv->profile_level.eLevel; + + break; + } + default: + return omx_base_component_GetParameter(handle, idx, param); + } + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + int i; + + if (!config) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexConfigVideoIntraVOPRefresh: { + OMX_CONFIG_INTRAREFRESHVOPTYPE *type = config; + + r = checkHeader(config, sizeof(OMX_CONFIG_INTRAREFRESHVOPTYPE)); + if (r) + return r; + + priv->force_pic_type = *type; + + break; + } + case OMX_IndexConfigCommonScale: { + OMX_CONFIG_SCALEFACTORTYPE *scale = config; + + r = checkHeader(config, sizeof(OMX_CONFIG_SCALEFACTORTYPE)); + if (r) + return r; + + if (scale->xWidth < 176 || scale->xHeight < 144) + return OMX_ErrorBadParameter; + + for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) { + if (priv->scale_buffer[i]) { + priv->scale_buffer[i]->destroy(priv->scale_buffer[i]); + priv->scale_buffer[i] = NULL; + } + } + + priv->scale = *scale; + if (priv->scale.xWidth != 0xffffffff && priv->scale.xHeight != 0xffffffff) { + struct pipe_video_buffer templat = {}; + + templat.buffer_format = PIPE_FORMAT_NV12; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = priv->scale.xWidth; + templat.height = priv->scale.xHeight; + templat.interlaced = false; + for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) { + priv->scale_buffer[i] = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat); + if (!priv->scale_buffer[i]) + return OMX_ErrorInsufficientResources; + } + } + + break; + } + default: + return omx_base_component_SetConfig(handle, idx, config); + } + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config) +{ + OMX_COMPONENTTYPE *comp = handle; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_ERRORTYPE r; + + if (!config) + return OMX_ErrorBadParameter; + + switch(idx) { + case OMX_IndexConfigCommonScale: { + OMX_CONFIG_SCALEFACTORTYPE *scale = config; + + r = checkHeader(config, sizeof(OMX_CONFIG_SCALEFACTORTYPE)); + if (r) + return r; + + scale->xWidth = priv->scale.xWidth; + scale->xHeight = priv->scale.xHeight; + + break; + } + default: + return omx_base_component_GetConfig(handle, idx, config); + } + + return OMX_ErrorNone; +} + +static enum pipe_video_profile enc_TranslateOMXProfileToPipe(unsigned omx_profile) +{ + switch (omx_profile) { + case OMX_VIDEO_AVCProfileBaseline: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE; + case OMX_VIDEO_AVCProfileMain: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN; + case OMX_VIDEO_AVCProfileExtended: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED; + case OMX_VIDEO_AVCProfileHigh: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH; + case OMX_VIDEO_AVCProfileHigh10: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10; + case OMX_VIDEO_AVCProfileHigh422: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422; + case OMX_VIDEO_AVCProfileHigh444: + return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444; + default: + return PIPE_VIDEO_PROFILE_UNKNOWN; + } +} + +static unsigned enc_TranslateOMXLevelToPipe(unsigned omx_level) +{ + switch (omx_level) { + case OMX_VIDEO_AVCLevel1: + case OMX_VIDEO_AVCLevel1b: + return 10; + case OMX_VIDEO_AVCLevel11: + return 11; + case OMX_VIDEO_AVCLevel12: + return 12; + case OMX_VIDEO_AVCLevel13: + return 13; + case OMX_VIDEO_AVCLevel2: + return 20; + case OMX_VIDEO_AVCLevel21: + return 21; + case OMX_VIDEO_AVCLevel22: + return 22; + case OMX_VIDEO_AVCLevel3: + return 30; + case OMX_VIDEO_AVCLevel31: + return 31; + case OMX_VIDEO_AVCLevel32: + return 32; + case OMX_VIDEO_AVCLevel4: + return 40; + case OMX_VIDEO_AVCLevel41: + return 41; + default: + case OMX_VIDEO_AVCLevel42: + return 42; + case OMX_VIDEO_AVCLevel5: + return 50; + case OMX_VIDEO_AVCLevel51: + return 51; + } +} + +static OMX_ERRORTYPE vid_enc_MessageHandler(OMX_COMPONENTTYPE* comp, internalRequestMessageType *msg) +{ + vid_enc_PrivateType* priv = comp->pComponentPrivate; + + if (msg->messageType == OMX_CommandStateSet) { + if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) { + + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + + templat.profile = enc_TranslateOMXProfileToPipe(priv->profile_level.eProfile); + templat.level = enc_TranslateOMXLevelToPipe(priv->profile_level.eLevel); + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_ENCODE; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = priv->scale_buffer[priv->current_scale_buffer] ? + priv->scale.xWidth : port->sPortParam.format.video.nFrameWidth; + templat.height = priv->scale_buffer[priv->current_scale_buffer] ? + priv->scale.xHeight : port->sPortParam.format.video.nFrameHeight; + + if (templat.profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) { + struct pipe_screen *screen = priv->screen->pscreen; + templat.max_references = 1; + priv->stacked_frames_num = + screen->get_video_param(screen, + PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH, + PIPE_VIDEO_ENTRYPOINT_ENCODE, + PIPE_VIDEO_CAP_STACKED_FRAMES); + } else { + templat.max_references = OMX_VID_ENC_P_PERIOD_DEFAULT; + priv->stacked_frames_num = 1; + } + priv->codec = priv->s_pipe->create_video_codec(priv->s_pipe, &templat); + + } else if ((msg->messageParam == OMX_StateLoaded) && (priv->state == OMX_StateIdle)) { + if (priv->codec) { + priv->codec->destroy(priv->codec); + priv->codec = NULL; + } + } + } + + return omx_base_component_MessageHandler(comp, msg); +} + +static OMX_ERRORTYPE vid_enc_AllocateInBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf, + OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size) +{ + struct input_buf_private *inp; + OMX_ERRORTYPE r; + + r = base_port_AllocateBuffer(port, buf, idx, private, size); + if (r) + return r; + + inp = (*buf)->pInputPortPrivate = CALLOC_STRUCT(input_buf_private); + if (!inp) { + base_port_FreeBuffer(port, idx, *buf); + return OMX_ErrorInsufficientResources; + } + + LIST_INITHEAD(&inp->tasks); + + FREE((*buf)->pBuffer); + r = enc_AllocateBackTexture(port, &inp->resource, &inp->transfer, &(*buf)->pBuffer); + if (r) { + FREE(inp); + base_port_FreeBuffer(port, idx, *buf); + return r; + } + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_UseInBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE **buf, OMX_U32 idx, + OMX_PTR private, OMX_U32 size, OMX_U8 *mem) +{ + struct input_buf_private *inp; + OMX_ERRORTYPE r; + + r = base_port_UseBuffer(port, buf, idx, private, size, mem); + if (r) + return r; + + inp = (*buf)->pInputPortPrivate = CALLOC_STRUCT(input_buf_private); + if (!inp) { + base_port_FreeBuffer(port, idx, *buf); + return OMX_ErrorInsufficientResources; + } + + LIST_INITHEAD(&inp->tasks); + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_FreeInBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct input_buf_private *inp = buf->pInputPortPrivate; + + if (inp) { + enc_ReleaseTasks(&inp->tasks); + if (inp->transfer) + pipe_transfer_unmap(priv->s_pipe, inp->transfer); + pipe_resource_reference(&inp->resource, NULL); + FREE(inp); + } + buf->pBuffer = NULL; + + return base_port_FreeBuffer(port, idx, buf); +} + +static OMX_ERRORTYPE vid_enc_AllocateOutBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf, + OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size) +{ + OMX_ERRORTYPE r; + + r = base_port_AllocateBuffer(port, buf, idx, private, size); + if (r) + return r; + + FREE((*buf)->pBuffer); + (*buf)->pBuffer = NULL; + (*buf)->pOutputPortPrivate = CALLOC(1, sizeof(struct output_buf_private)); + if (!(*buf)->pOutputPortPrivate) { + base_port_FreeBuffer(port, idx, *buf); + return OMX_ErrorInsufficientResources; + } + + return OMX_ErrorNone; +} + +static OMX_ERRORTYPE vid_enc_FreeOutBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + + if (buf->pOutputPortPrivate) { + struct output_buf_private *outp = buf->pOutputPortPrivate; + if (outp->transfer) + pipe_transfer_unmap(priv->t_pipe, outp->transfer); + pipe_resource_reference(&outp->bitstream, NULL); + FREE(outp); + buf->pOutputPortPrivate = NULL; + } + buf->pBuffer = NULL; + + return base_port_FreeBuffer(port, idx, buf); +} + +static struct encode_task *enc_NeedTask(omx_base_PortType *port) +{ + OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video; + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + + struct pipe_video_buffer templat = {}; + struct encode_task *task; + + if (!LIST_IS_EMPTY(&priv->free_tasks)) { + task = LIST_ENTRY(struct encode_task, priv->free_tasks.next, list); + LIST_DEL(&task->list); + return task; + } + + /* allocate a new one */ + task = CALLOC_STRUCT(encode_task); + if (!task) + return NULL; + + templat.buffer_format = PIPE_FORMAT_NV12; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.width = def->nFrameWidth; + templat.height = def->nFrameHeight; + templat.interlaced = false; + + task->buf = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat); + if (!task->buf) { + FREE(task); + return NULL; + } + + return task; +} + +static void enc_MoveTasks(struct list_head *from, struct list_head *to) +{ + to->prev->next = from->next; + from->next->prev = to->prev; + from->prev->next = to; + to->prev = from->prev; + LIST_INITHEAD(from); +} + +static void enc_ReleaseTasks(struct list_head *head) +{ + struct encode_task *i, *next; + + if (!head || !head->next) + return; + + LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) { + pipe_resource_reference(&i->bitstream, NULL); + i->buf->destroy(i->buf); + FREE(i); + } +} + +static OMX_ERRORTYPE enc_LoadImage(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf, + struct pipe_video_buffer *vbuf) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video; + struct pipe_box box = {}; + struct input_buf_private *inp = buf->pInputPortPrivate; + + if (!inp->resource) { + struct pipe_sampler_view **views; + void *ptr; + + views = vbuf->get_sampler_view_planes(vbuf); + if (!views) + return OMX_ErrorInsufficientResources; + + ptr = buf->pBuffer; + box.width = def->nFrameWidth; + box.height = def->nFrameHeight; + box.depth = 1; + priv->s_pipe->texture_subdata(priv->s_pipe, views[0]->texture, 0, + PIPE_TRANSFER_WRITE, &box, + ptr, def->nStride, 0); + ptr = ((uint8_t*)buf->pBuffer) + (def->nStride * box.height); + box.width = def->nFrameWidth / 2; + box.height = def->nFrameHeight / 2; + box.depth = 1; + priv->s_pipe->texture_subdata(priv->s_pipe, views[1]->texture, 0, + PIPE_TRANSFER_WRITE, &box, + ptr, def->nStride, 0); + } else { + struct pipe_blit_info blit; + struct vl_video_buffer *dst_buf = (struct vl_video_buffer *)vbuf; + + pipe_transfer_unmap(priv->s_pipe, inp->transfer); + + box.width = def->nFrameWidth; + box.height = def->nFrameHeight; + box.depth = 1; + + priv->s_pipe->resource_copy_region(priv->s_pipe, + dst_buf->resources[0], + 0, 0, 0, 0, inp->resource, 0, &box); + + memset(&blit, 0, sizeof(blit)); + blit.src.resource = inp->resource; + blit.src.format = inp->resource->format; + + blit.src.box.x = 0; + blit.src.box.y = def->nFrameHeight; + blit.src.box.width = def->nFrameWidth; + blit.src.box.height = def->nFrameHeight / 2 ; + blit.src.box.depth = 1; + + blit.dst.resource = dst_buf->resources[1]; + blit.dst.format = blit.dst.resource->format; + + blit.dst.box.width = def->nFrameWidth / 2; + blit.dst.box.height = def->nFrameHeight / 2; + blit.dst.box.depth = 1; + blit.filter = PIPE_TEX_FILTER_NEAREST; + + blit.mask = PIPE_MASK_G; + priv->s_pipe->blit(priv->s_pipe, &blit); + + blit.src.box.x = 1; + blit.mask = PIPE_MASK_R; + priv->s_pipe->blit(priv->s_pipe, &blit); + priv->s_pipe->flush(priv->s_pipe, NULL, 0); + + box.width = inp->resource->width0; + box.height = inp->resource->height0; + box.depth = inp->resource->depth0; + buf->pBuffer = priv->s_pipe->transfer_map(priv->s_pipe, inp->resource, 0, + PIPE_TRANSFER_WRITE, &box, + &inp->transfer); + } + + return OMX_ErrorNone; +} + +static void enc_ScaleInput(omx_base_PortType *port, struct pipe_video_buffer **vbuf, unsigned *size) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video; + struct pipe_video_buffer *src_buf = *vbuf; + struct vl_compositor *compositor = &priv->compositor; + struct vl_compositor_state *s = &priv->cstate; + struct pipe_sampler_view **views; + struct pipe_surface **dst_surface; + unsigned i; + + if (!priv->scale_buffer[priv->current_scale_buffer]) + return; + + views = src_buf->get_sampler_view_planes(src_buf); + dst_surface = priv->scale_buffer[priv->current_scale_buffer]->get_surfaces + (priv->scale_buffer[priv->current_scale_buffer]); + vl_compositor_clear_layers(s); + + for (i = 0; i < VL_MAX_SURFACES; ++i) { + struct u_rect src_rect; + if (!views[i] || !dst_surface[i]) + continue; + src_rect.x0 = 0; + src_rect.y0 = 0; + src_rect.x1 = def->nFrameWidth; + src_rect.y1 = def->nFrameHeight; + if (i > 0) { + src_rect.x1 /= 2; + src_rect.y1 /= 2; + } + vl_compositor_set_rgba_layer(s, compositor, 0, views[i], &src_rect, NULL, NULL); + vl_compositor_render(s, compositor, dst_surface[i], NULL, false); + } + *size = priv->scale.xWidth * priv->scale.xHeight * 2; + *vbuf = priv->scale_buffer[priv->current_scale_buffer++]; + priv->current_scale_buffer %= OMX_VID_ENC_NUM_SCALING_BUFFERS; +} + +static void enc_GetPictureParamPreset(struct pipe_h264_enc_picture_desc *picture) +{ + picture->motion_est.enc_disable_sub_mode = 0x000000fe; + picture->motion_est.enc_ime2_search_range_x = 0x00000001; + picture->motion_est.enc_ime2_search_range_y = 0x00000001; + picture->pic_ctrl.enc_constraint_set_flags = 0x00000040; +} + +static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_picture_desc *picture) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct pipe_h264_enc_rate_control *rate_ctrl = &picture->rate_ctrl; + + switch (priv->bitrate.eControlRate) { + case OMX_Video_ControlRateVariable: + rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE; + break; + case OMX_Video_ControlRateConstant: + rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT; + break; + case OMX_Video_ControlRateVariableSkipFrames: + rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP; + break; + case OMX_Video_ControlRateConstantSkipFrames: + rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP; + break; + default: + rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE; + break; + } + + rate_ctrl->frame_rate_den = OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT; + rate_ctrl->frame_rate_num = ((priv->frame_rate) >> 16) * rate_ctrl->frame_rate_den; + + if (rate_ctrl->rate_ctrl_method != PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE) { + if (priv->bitrate.nTargetBitrate < OMX_VID_ENC_BITRATE_MIN) + rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MIN; + else if (priv->bitrate.nTargetBitrate < OMX_VID_ENC_BITRATE_MAX) + rate_ctrl->target_bitrate = priv->bitrate.nTargetBitrate; + else + rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MAX; + rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate; + if (rate_ctrl->target_bitrate < OMX_VID_ENC_BITRATE_MEDIAN) + rate_ctrl->vbv_buffer_size = MIN2((rate_ctrl->target_bitrate * 2.75), OMX_VID_ENC_BITRATE_MEDIAN); + else + rate_ctrl->vbv_buffer_size = rate_ctrl->target_bitrate; + + if (rate_ctrl->frame_rate_num) { + unsigned long long t = rate_ctrl->target_bitrate; + t *= rate_ctrl->frame_rate_den; + rate_ctrl->target_bits_picture = t / rate_ctrl->frame_rate_num; + } else { + rate_ctrl->target_bits_picture = rate_ctrl->target_bitrate; + } + rate_ctrl->peak_bits_picture_integer = rate_ctrl->target_bits_picture; + rate_ctrl->peak_bits_picture_fraction = 0; + } + + picture->quant_i_frames = priv->quant.nQpI; + picture->quant_p_frames = priv->quant.nQpP; + picture->quant_b_frames = priv->quant.nQpB; + + picture->frame_num = priv->frame_num; + picture->ref_idx_l0 = priv->ref_idx_l0; + picture->ref_idx_l1 = priv->ref_idx_l1; + picture->enable_vui = (picture->rate_ctrl.frame_rate_num != 0); + enc_GetPictureParamPreset(picture); +} + +static void enc_HandleTask(omx_base_PortType *port, struct encode_task *task, + enum pipe_h264_enc_picture_type picture_type) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + unsigned size = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]->sPortParam.nBufferSize; + struct pipe_video_buffer *vbuf = task->buf; + struct pipe_h264_enc_picture_desc picture = {}; + + /* -------------- scale input image --------- */ + enc_ScaleInput(port, &vbuf, &size); + priv->s_pipe->flush(priv->s_pipe, NULL, 0); + + /* -------------- allocate output buffer --------- */ + task->bitstream = pipe_buffer_create(priv->s_pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STAGING, /* map for read */ + size); + + picture.picture_type = picture_type; + picture.pic_order_cnt = task->pic_order_cnt; + if (priv->restricted_b_frames && picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) + picture.not_referenced = true; + enc_ControlPicture(port, &picture); + + /* -------------- encode frame --------- */ + priv->codec->begin_frame(priv->codec, vbuf, &picture.base); + priv->codec->encode_bitstream(priv->codec, vbuf, task->bitstream, &task->feedback); + priv->codec->end_frame(priv->codec, vbuf, &picture.base); +} + +static void enc_ClearBframes(omx_base_PortType *port, struct input_buf_private *inp) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct encode_task *task; + + if (LIST_IS_EMPTY(&priv->b_frames)) + return; + + task = LIST_ENTRY(struct encode_task, priv->b_frames.prev, list); + LIST_DEL(&task->list); + + /* promote last from to P frame */ + priv->ref_idx_l0 = priv->ref_idx_l1; + enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_P); + LIST_ADDTAIL(&task->list, &inp->tasks); + priv->ref_idx_l1 = priv->frame_num++; + + /* handle B frames */ + LIST_FOR_EACH_ENTRY(task, &priv->b_frames, list) { + enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_B); + if (!priv->restricted_b_frames) + priv->ref_idx_l0 = priv->frame_num; + priv->frame_num++; + } + + enc_MoveTasks(&priv->b_frames, &inp->tasks); +} + +static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf) +{ + OMX_COMPONENTTYPE* comp = port->standCompContainer; + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct input_buf_private *inp = buf->pInputPortPrivate; + enum pipe_h264_enc_picture_type picture_type; + struct encode_task *task; + unsigned stacked_num = 0; + OMX_ERRORTYPE err; + + enc_MoveTasks(&inp->tasks, &priv->free_tasks); + task = enc_NeedTask(port); + if (!task) + return OMX_ErrorInsufficientResources; + + if (buf->nFilledLen == 0) { + if (buf->nFlags & OMX_BUFFERFLAG_EOS) { + buf->nFilledLen = buf->nAllocLen; + enc_ClearBframes(port, inp); + enc_MoveTasks(&priv->stacked_tasks, &inp->tasks); + priv->codec->flush(priv->codec); + } + return base_port_SendBufferFunction(port, buf); + } + + if (buf->pOutputPortPrivate) { + struct pipe_video_buffer *vbuf = buf->pOutputPortPrivate; + buf->pOutputPortPrivate = task->buf; + task->buf = vbuf; + } else { + /* ------- load input image into video buffer ---- */ + err = enc_LoadImage(port, buf, task->buf); + if (err != OMX_ErrorNone) { + FREE(task); + return err; + } + } + + /* -------------- determine picture type --------- */ + if (!(priv->pic_order_cnt % OMX_VID_ENC_IDR_PERIOD_DEFAULT) || + priv->force_pic_type.IntraRefreshVOP) { + enc_ClearBframes(port, inp); + picture_type = PIPE_H264_ENC_PICTURE_TYPE_IDR; + priv->force_pic_type.IntraRefreshVOP = OMX_FALSE; + priv->frame_num = 0; + } else if (priv->codec->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE || + !(priv->pic_order_cnt % OMX_VID_ENC_P_PERIOD_DEFAULT) || + (buf->nFlags & OMX_BUFFERFLAG_EOS)) { + picture_type = PIPE_H264_ENC_PICTURE_TYPE_P; + } else { + picture_type = PIPE_H264_ENC_PICTURE_TYPE_B; + } + + task->pic_order_cnt = priv->pic_order_cnt++; + + if (picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) { + /* put frame at the tail of the queue */ + LIST_ADDTAIL(&task->list, &priv->b_frames); + } else { + /* handle I or P frame */ + priv->ref_idx_l0 = priv->ref_idx_l1; + enc_HandleTask(port, task, picture_type); + LIST_ADDTAIL(&task->list, &priv->stacked_tasks); + LIST_FOR_EACH_ENTRY(task, &priv->stacked_tasks, list) { + ++stacked_num; + } + if (stacked_num == priv->stacked_frames_num) { + struct encode_task *t; + t = LIST_ENTRY(struct encode_task, priv->stacked_tasks.next, list); + LIST_DEL(&t->list); + LIST_ADDTAIL(&t->list, &inp->tasks); + } + priv->ref_idx_l1 = priv->frame_num++; + + /* handle B frames */ + LIST_FOR_EACH_ENTRY(task, &priv->b_frames, list) { + enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_B); + if (!priv->restricted_b_frames) + priv->ref_idx_l0 = priv->frame_num; + priv->frame_num++; + } + + enc_MoveTasks(&priv->b_frames, &inp->tasks); + } + + if (LIST_IS_EMPTY(&inp->tasks)) + return port->ReturnBufferFunction(port, buf); + else + return base_port_SendBufferFunction(port, buf); +} + +static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output) +{ + vid_enc_PrivateType *priv = comp->pComponentPrivate; + struct output_buf_private *outp = output->pOutputPortPrivate; + struct input_buf_private *inp = input->pInputPortPrivate; + struct encode_task *task; + struct pipe_box box = {}; + unsigned size; + + if (!inp || LIST_IS_EMPTY(&inp->tasks)) { + input->nFilledLen = 0; /* mark buffer as empty */ + enc_MoveTasks(&priv->used_tasks, &inp->tasks); + return; + } + + task = LIST_ENTRY(struct encode_task, inp->tasks.next, list); + LIST_DEL(&task->list); + LIST_ADDTAIL(&task->list, &priv->used_tasks); + + if (!task->bitstream) + return; + + /* ------------- map result buffer ----------------- */ + + if (outp->transfer) + pipe_transfer_unmap(priv->t_pipe, outp->transfer); + + pipe_resource_reference(&outp->bitstream, task->bitstream); + pipe_resource_reference(&task->bitstream, NULL); + + box.width = outp->bitstream->width0; + box.height = outp->bitstream->height0; + box.depth = outp->bitstream->depth0; + + output->pBuffer = priv->t_pipe->transfer_map(priv->t_pipe, outp->bitstream, 0, + PIPE_TRANSFER_READ_WRITE, + &box, &outp->transfer); + + /* ------------- get size of result ----------------- */ + + priv->codec->get_feedback(priv->codec, task->feedback, &size); + + output->nOffset = 0; + output->nFilledLen = size; /* mark buffer as full */ + + /* all output buffers contain exactly one frame */ + output->nFlags = OMX_BUFFERFLAG_ENDOFFRAME; +} diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h new file mode 100644 index 000000000..a83374450 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König <christian.koenig@amd.com> + * + */ + +#ifndef OMX_VID_ENC_H +#define OMX_VID_ENC_H + +#include <OMX_Types.h> +#include <OMX_Component.h> +#include <OMX_Core.h> + +#include <bellagio/st_static_component_loader.h> +#include <bellagio/omx_base_filter.h> + +#include "util/list.h" + +#include "vl/vl_defines.h" +#include "vl/vl_compositor.h" + +#define OMX_VID_ENC_BASE_NAME "OMX.mesa.video_encoder" +#define OMX_VID_ENC_AVC_NAME "OMX.mesa.video_encoder.avc" +#define OMX_VID_ENC_AVC_ROLE "video_encoder.avc" + +#define OMX_VID_ENC_BITRATE_MIN 64000 +#define OMX_VID_ENC_BITRATE_MEDIAN 2000000 +#define OMX_VID_ENC_BITRATE_MAX 240000000 +#define OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT 1001 +#define OMX_VID_ENC_QUANT_I_FRAMES_DEFAULT 0x1c +#define OMX_VID_ENC_QUANT_P_FRAMES_DEFAULT 0x1c +#define OMX_VID_ENC_QUANT_B_FRAMES_DEFAULT 0x1c +#define OMX_VID_ENC_SCALING_WIDTH_DEFAULT 0xffffffff +#define OMX_VID_ENC_SCALING_HEIGHT_DEFAULT 0xffffffff +#define OMX_VID_ENC_IDR_PERIOD_DEFAULT 1000 +#define OMX_VID_ENC_P_PERIOD_DEFAULT 3 + +#define OMX_VID_ENC_NUM_SCALING_BUFFERS 4 + +DERIVEDCLASS(vid_enc_PrivateType, omx_base_filter_PrivateType) +#define vid_enc_PrivateType_FIELDS omx_base_filter_PrivateType_FIELDS \ + struct vl_screen *screen; \ + struct pipe_context *s_pipe; \ + struct pipe_context *t_pipe; \ + struct pipe_video_codec *codec; \ + struct list_head free_tasks; \ + struct list_head used_tasks; \ + struct list_head b_frames; \ + struct list_head stacked_tasks; \ + OMX_U32 frame_rate; \ + OMX_U32 frame_num; \ + OMX_U32 pic_order_cnt; \ + OMX_U32 ref_idx_l0, ref_idx_l1; \ + OMX_BOOL restricted_b_frames; \ + OMX_VIDEO_PARAM_BITRATETYPE bitrate; \ + OMX_VIDEO_PARAM_QUANTIZATIONTYPE quant; \ + OMX_VIDEO_PARAM_PROFILELEVELTYPE profile_level; \ + OMX_CONFIG_INTRAREFRESHVOPTYPE force_pic_type; \ + struct vl_compositor compositor; \ + struct vl_compositor_state cstate; \ + struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS]; \ + OMX_CONFIG_SCALEFACTORTYPE scale; \ + OMX_U32 current_scale_buffer; \ + OMX_U32 stacked_frames_num; +ENDCLASS(vid_enc_PrivateType) + +OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp); + +#endif diff --git a/lib/mesa/src/gallium/state_trackers/va/Makefile.sources b/lib/mesa/src/gallium/state_trackers/va/Makefile.sources index daebf0120..2d6546b4b 100644 --- a/lib/mesa/src/gallium/state_trackers/va/Makefile.sources +++ b/lib/mesa/src/gallium/state_trackers/va/Makefile.sources @@ -10,6 +10,7 @@ C_SOURCES := \ picture_h264.c \ picture_hevc.c \ picture_vc1.c \ + picture_mjpeg.c \ postproc.c \ subpicture.c \ surface.c \ diff --git a/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c b/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c new file mode 100644 index 000000000..396b74344 --- /dev/null +++ b/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "va_private.h" + +void vlVaHandlePictureParameterBufferMJPEG(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) +{ + VAPictureParameterBufferJPEGBaseline *mjpeg = buf->data; + unsigned sf; + int i; + + assert(buf->size >= sizeof(VAPictureParameterBufferJPEGBaseline) && buf->num_elements == 1); + + context->desc.mjpeg.picture_parameter.picture_width = mjpeg->picture_width; + context->desc.mjpeg.picture_parameter.picture_height = mjpeg->picture_height; + + for (i = 0; i < mjpeg->num_components; ++i) { + context->desc.mjpeg.picture_parameter.components[i].component_id = + mjpeg->components[i].component_id; + context->desc.mjpeg.picture_parameter.components[i].h_sampling_factor = + mjpeg->components[i].h_sampling_factor; + context->desc.mjpeg.picture_parameter.components[i].v_sampling_factor = + mjpeg->components[i].v_sampling_factor; + context->desc.mjpeg.picture_parameter.components[i].quantiser_table_selector = + mjpeg->components[i].quantiser_table_selector; + + sf = mjpeg->components[i].h_sampling_factor << 4 | mjpeg->components[i].v_sampling_factor; + context->mjpeg.sampling_factor <<= 8; + context->mjpeg.sampling_factor |= sf; + } + + context->desc.mjpeg.picture_parameter.num_components = mjpeg->num_components; +} + +void vlVaHandleIQMatrixBufferMJPEG(vlVaContext *context, vlVaBuffer *buf) +{ + VAIQMatrixBufferJPEGBaseline *mjpeg = buf->data; + + assert(buf->size >= sizeof(VAIQMatrixBufferJPEGBaseline) && buf->num_elements == 1); + + memcpy(&context->desc.mjpeg.quantization_table.load_quantiser_table, mjpeg->load_quantiser_table, 4); + memcpy(&context->desc.mjpeg.quantization_table.quantiser_table, mjpeg->quantiser_table, 4 * 64); +} + +void vlVaHandleHuffmanTableBufferType(vlVaContext *context, vlVaBuffer *buf) +{ + VAHuffmanTableBufferJPEGBaseline *mjpeg = buf->data; + int i; + + assert(buf->size >= sizeof(VASliceParameterBufferJPEGBaseline) && buf->num_elements == 1); + + for (i = 0; i < 2; ++i) { + context->desc.mjpeg.huffman_table.load_huffman_table[i] = mjpeg->load_huffman_table[i]; + + memcpy(&context->desc.mjpeg.huffman_table.table[i].num_dc_codes, + mjpeg->huffman_table[i].num_dc_codes, 16); + memcpy(&context->desc.mjpeg.huffman_table.table[i].dc_values, + mjpeg->huffman_table[i].dc_values, 12); + memcpy(&context->desc.mjpeg.huffman_table.table[i].num_ac_codes, + mjpeg->huffman_table[i].num_ac_codes, 16); + memcpy(&context->desc.mjpeg.huffman_table.table[i].ac_values, + mjpeg->huffman_table[i].ac_values, 162); + memcpy(&context->desc.mjpeg.huffman_table.table[i].pad, mjpeg->huffman_table[i].pad, 2); + } +} + +void vlVaHandleSliceParameterBufferMJPEG(vlVaContext *context, vlVaBuffer *buf) +{ + VASliceParameterBufferJPEGBaseline *mjpeg = buf->data; + int i; + + assert(buf->size >= sizeof(VASliceParameterBufferJPEGBaseline) && buf->num_elements == 1); + + context->desc.mjpeg.slice_parameter.slice_data_size = mjpeg->slice_data_size; + context->desc.mjpeg.slice_parameter.slice_data_offset = mjpeg->slice_data_offset; + context->desc.mjpeg.slice_parameter.slice_data_flag = mjpeg->slice_data_flag; + context->desc.mjpeg.slice_parameter.slice_horizontal_position = mjpeg->slice_horizontal_position; + context->desc.mjpeg.slice_parameter.slice_vertical_position = mjpeg->slice_vertical_position; + + for (i = 0; i < mjpeg->num_components; ++i) { + context->desc.mjpeg.slice_parameter.components[i].component_selector = + mjpeg->components[i].component_selector; + context->desc.mjpeg.slice_parameter.components[i].dc_table_selector = + mjpeg->components[i].dc_table_selector; + context->desc.mjpeg.slice_parameter.components[i].ac_table_selector = + mjpeg->components[i].ac_table_selector; + } + + context->desc.mjpeg.slice_parameter.num_components = mjpeg->num_components; + context->desc.mjpeg.slice_parameter.restart_interval = mjpeg->restart_interval; + context->desc.mjpeg.slice_parameter.num_mcus = mjpeg->num_mcus; +} diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am new file mode 100644 index 000000000..4c9a12c3c --- /dev/null +++ b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am @@ -0,0 +1,75 @@ +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + $(GALLIUM_TARGET_CFLAGS) + +omxdir = $(OMX_BELLAGIO_LIB_INSTALL_DIR) +omx_LTLIBRARIES = libomx_mesa.la + +nodist_EXTRA_libomx_mesa_la_SOURCES = dummy.cpp +libomx_mesa_la_SOURCES = + +libomx_mesa_la_LDFLAGS = \ + -shared \ + -module \ + -no-undefined \ + -avoid-version \ + $(GC_SECTIONS) \ + $(LD_NO_UNDEFINED) + +if HAVE_LD_VERSION_SCRIPT +libomx_mesa_la_LDFLAGS += \ + -Wl,--version-script=$(top_srcdir)/src/gallium/targets/omx-bellagio/omx.sym +endif # HAVE_LD_VERSION_SCRIPT + +libomx_mesa_la_LIBADD = \ + $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(OMX_BELLAGIO_LIBS) \ + $(LIBDRM_LIBS) \ + $(GALLIUM_COMMON_LIB_DEPS) + +if HAVE_PLATFORM_X11 +libomx_mesa_la_LIBADD += \ + $(VL_LIBS) \ + $(XCB_DRI3_LIBS) +endif + +EXTRA_libomx_mesa_la_DEPENDENCIES = omx.sym +EXTRA_DIST = omx.sym + +if HAVE_GALLIUM_STATIC_TARGETS + +TARGET_DRIVERS = +TARGET_CPPFLAGS = +TARGET_LIB_DEPS = + + +include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc + +include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc +include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc + +libomx_mesa_la_SOURCES += target.c +libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS) +libomx_mesa_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ + $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ + $(TARGET_LIB_DEPS) \ + $(TARGET_COMPILER_LIB_DEPS) \ + $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) + +else # HAVE_GALLIUM_STATIC_TARGETS + +libomx_mesa_la_LIBADD += \ + $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la + +endif # HAVE_GALLIUM_STATIC_TARGETS + +if HAVE_GALLIUM_LLVM +libomx_mesa_la_LIBADD += $(LLVM_LIBS) +libomx_mesa_la_LDFLAGS += $(LLVM_LDFLAGS) +endif diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in new file mode 100644 index 000000000..1c63b48c7 --- /dev/null +++ b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in @@ -0,0 +1,1102 @@ +# Makefile.in generated by automake 1.15.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2017 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@HAVE_LIBDRM_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) + +@HAVE_DRISW_TRUE@am__append_2 = \ +@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la + +@HAVE_DRISW_KMS_TRUE@am__append_3 = \ +@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ +@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) + +@HAVE_LD_VERSION_SCRIPT_TRUE@am__append_4 = \ +@HAVE_LD_VERSION_SCRIPT_TRUE@ -Wl,--version-script=$(top_srcdir)/src/gallium/targets/omx-bellagio/omx.sym + +@HAVE_PLATFORM_X11_TRUE@am__append_5 = \ +@HAVE_PLATFORM_X11_TRUE@ $(VL_LIBS) \ +@HAVE_PLATFORM_X11_TRUE@ $(XCB_DRI3_LIBS) + +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_6 = nouveau +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_7 = -DGALLIUM_NOUVEAU +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_8 = \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/nouveau/drm/libnouveaudrm.la \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/nouveau/libnouveau.la \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(NOUVEAU_LIBS) \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS) + +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_9 = r600 +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_10 = -DGALLIUM_R600 +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_11 = \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/r600/libr600.la \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(RADEON_LIBS) \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS) \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBELF_LIBS) + +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_12 = radeonsi +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_13 = -DGALLIUM_RADEONSI +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_14 = \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(RADEON_LIBS) \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS) \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(AMDGPU_LIBS) + +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_15 = target.c +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_16 = \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_LIB_DEPS) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_COMPILER_LIB_DEPS) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON) + +@HAVE_GALLIUM_STATIC_TARGETS_FALSE@am__append_17 = \ +@HAVE_GALLIUM_STATIC_TARGETS_FALSE@ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la + +@HAVE_GALLIUM_LLVM_TRUE@am__append_18 = $(LLVM_LIBS) +@HAVE_GALLIUM_LLVM_TRUE@am__append_19 = $(LLVM_LDFLAGS) +subdir = src/gallium/targets/omx-bellagio +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_gnu_make.m4 \ + $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ + $(top_srcdir)/m4/ax_gcc_builtin.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_flex.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/VERSION $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(omxdir)" +LTLIBRARIES = $(omx_LTLIBRARIES) +am__DEPENDENCIES_1 = +@HAVE_LIBDRM_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) +@HAVE_PLATFORM_X11_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) \ +@HAVE_PLATFORM_X11_TRUE@ $(am__DEPENDENCIES_1) +@HAVE_DRISW_KMS_TRUE@am__DEPENDENCIES_5 = $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ +@HAVE_DRISW_KMS_TRUE@ $(am__DEPENDENCIES_1) +am__DEPENDENCIES_6 = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_2) $(am__DEPENDENCIES_5) +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_7 = $(top_builddir)/src/gallium/winsys/nouveau/drm/libnouveaudrm.la \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/nouveau/libnouveau.la \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_8 = $(top_builddir)/src/gallium/drivers/r600/libr600.la \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_9 = $(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_10 = \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_7) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_8) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_9) +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_11 = $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_6) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_10) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_COMPILER_LIB_DEPS) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_WINSYS) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_COMMON) +@HAVE_GALLIUM_LLVM_TRUE@am__DEPENDENCIES_12 = $(am__DEPENDENCIES_1) +libomx_mesa_la_DEPENDENCIES = $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3) \ + $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_11) $(am__append_17) \ + $(am__DEPENDENCIES_12) +am__libomx_mesa_la_SOURCES_DIST = target.c +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__objects_1 = \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ libomx_mesa_la-target.lo +am_libomx_mesa_la_OBJECTS = $(am__objects_1) +libomx_mesa_la_OBJECTS = $(am_libomx_mesa_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libomx_mesa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(libomx_mesa_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(libomx_mesa_la_SOURCES) \ + $(nodist_EXTRA_libomx_mesa_la_SOURCES) +DIST_SOURCES = $(am__libomx_mesa_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/bin/depcomp \ + $(top_srcdir)/src/gallium/Automake.inc \ + $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc \ + $(top_srcdir)/src/gallium/drivers/r600/Automake.inc \ + $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ +AMDGPU_LIBS = @AMDGPU_LIBS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +ANDROID_CFLAGS = @ANDROID_CFLAGS@ +ANDROID_LIBS = @ANDROID_LIBS@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BSYMBOLIC = @BSYMBOLIC@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ +CLOCK_LIB = @CLOCK_LIB@ +CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ +DEFINES = @DEFINES@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLOPEN_LIBS = @DLOPEN_LIBS@ +DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ +DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRIGL_CFLAGS = @DRIGL_CFLAGS@ +DRIGL_LIBS = @DRIGL_LIBS@ +DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ +DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ +DRI_LIB_DEPS = @DRI_LIB_DEPS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGL_CFLAGS = @EGL_CFLAGS@ +EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGREP = @EGREP@ +ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ +ETNAVIV_LIBS = @ETNAVIV_LIBS@ +EXEEXT = @EXEEXT@ +EXPAT_CFLAGS = @EXPAT_CFLAGS@ +EXPAT_LIBS = @EXPAT_LIBS@ +FGREP = @FGREP@ +FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ +FREEDRENO_LIBS = @FREEDRENO_LIBS@ +GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ +GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ +GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ +GC_SECTIONS = @GC_SECTIONS@ +GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ +GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ +GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ +GLPROTO_LIBS = @GLPROTO_LIBS@ +GLVND_CFLAGS = @GLVND_CFLAGS@ +GLVND_LIBS = @GLVND_LIBS@ +GLX_TLS = @GLX_TLS@ +GL_LIB = @GL_LIB@ +GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GREP = @GREP@ +HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +I915_CFLAGS = @I915_CFLAGS@ +I915_LIBS = @I915_LIBS@ +INDENT = @INDENT@ +INDENT_FLAGS = @INDENT_FLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ +LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ +LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ +LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ +LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBELF_CFLAGS = @LIBELF_CFLAGS@ +LIBELF_LIBS = @LIBELF_LIBS@ +LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIB_DIR = @LIB_DIR@ +LIB_EXT = @LIB_EXT@ +LIPO = @LIPO@ +LLVM_CFLAGS = @LLVM_CFLAGS@ +LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ +LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ +LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBS = @LLVM_LIBS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ +MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ +NINE_MAJOR = @NINE_MAJOR@ +NINE_MINOR = @NINE_MINOR@ +NINE_TINY = @NINE_TINY@ +NINE_VERSION = @NINE_VERSION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ +NOUVEAU_LIBS = @NOUVEAU_LIBS@ +NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ +NVVIEUX_LIBS = @NVVIEUX_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@ +OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@ +OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@ +OPENCL_LIBNAME = @OPENCL_LIBNAME@ +OPENCL_VERSION = @OPENCL_VERSION@ +OSMESA_LIB = @OSMESA_LIB@ +OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ +OSMESA_PC_REQ = @OSMESA_PC_REQ@ +OSMESA_VERSION = @OSMESA_VERSION@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSIX_SHELL = @POSIX_SHELL@ +PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ +PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PWR8_CFLAGS = @PWR8_CFLAGS@ +PYTHON2 = @PYTHON2@ +RADEON_CFLAGS = @RADEON_CFLAGS@ +RADEON_LIBS = @RADEON_LIBS@ +RANLIB = @RANLIB@ +RM = @RM@ +SED = @SED@ +SELINUX_CFLAGS = @SELINUX_CFLAGS@ +SELINUX_LIBS = @SELINUX_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ +SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ +SSE41_CFLAGS = @SSE41_CFLAGS@ +STRIP = @STRIP@ +SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ +SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ +SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ +SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ +SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VA_CFLAGS = @VA_CFLAGS@ +VA_LIBS = @VA_LIBS@ +VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ +VA_MAJOR = @VA_MAJOR@ +VA_MINOR = @VA_MINOR@ +VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@ +VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@ +VDPAU_CFLAGS = @VDPAU_CFLAGS@ +VDPAU_LIBS = @VDPAU_LIBS@ +VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ +VDPAU_MAJOR = @VDPAU_MAJOR@ +VDPAU_MINOR = @VDPAU_MINOR@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +VL_CFLAGS = @VL_CFLAGS@ +VL_LIBS = @VL_LIBS@ +VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ +WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@ +WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@ +WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ +WAYLAND_SCANNER = @WAYLAND_SCANNER@ +WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ +WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ +WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@ +WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@ +WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@ +X11_INCLUDES = @X11_INCLUDES@ +XA_MAJOR = @XA_MAJOR@ +XA_MINOR = @XA_MINOR@ +XA_TINY = @XA_TINY@ +XA_VERSION = @XA_VERSION@ +XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ +XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@ +XCB_DRI3_LIBS = @XCB_DRI3_LIBS@ +XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ +XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ +XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ +XLIBGL_LIBS = @XLIBGL_LIBS@ +XVMC_CFLAGS = @XVMC_CFLAGS@ +XVMC_LIBS = @XVMC_LIBS@ +XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ +XVMC_MAJOR = @XVMC_MAJOR@ +XVMC_MINOR = @XVMC_MINOR@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acv_mako_found = @acv_mako_found@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +ifGNUmake = @ifGNUmake@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +GALLIUM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + + +# src/gallium/auxiliary must appear before src/gallium/drivers +# because there are stupidly two rbug_context.h files in +# different directories, and which one is included by the +# preprocessor is determined by the ordering of the -I flags. +GALLIUM_DRIVER_CFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_DRIVER_CXXFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CXXFLAGS) + +GALLIUM_TARGET_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/loader \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(top_builddir)/src/util/ \ + -I$(top_builddir)/src/gallium/drivers/ \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) \ + $(LIBDRM_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ + $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) +GALLIUM_WINSYS_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_2) $(am__append_3) +AM_CFLAGS = \ + $(GALLIUM_TARGET_CFLAGS) + +omxdir = $(OMX_BELLAGIO_LIB_INSTALL_DIR) +omx_LTLIBRARIES = libomx_mesa.la +nodist_EXTRA_libomx_mesa_la_SOURCES = dummy.cpp +libomx_mesa_la_SOURCES = $(am__append_15) +libomx_mesa_la_LDFLAGS = -shared -module -no-undefined -avoid-version \ + $(GC_SECTIONS) $(LD_NO_UNDEFINED) $(am__append_4) \ + $(am__append_19) +libomx_mesa_la_LIBADD = $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \ + $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la $(OMX_BELLAGIO_LIBS) \ + $(LIBDRM_LIBS) $(GALLIUM_COMMON_LIB_DEPS) $(am__append_5) \ + $(am__append_16) $(am__append_17) $(am__append_18) +EXTRA_libomx_mesa_la_DEPENDENCIES = omx.sym +EXTRA_DIST = omx.sym +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_DRIVERS = $(am__append_6) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_9) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_12) +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_CPPFLAGS = $(am__append_7) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_10) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_13) +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_LIB_DEPS = $(am__append_8) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_11) \ +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_14) +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_WINSYS = \ +@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la + +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_WINSYS = \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la + +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_COMPILER_LIB_DEPS = \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/compiler/nir/libnir.la + +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_COMMON = \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/radeon/libradeon.la \ +@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/amd/common/libamd_common.la + +@HAVE_GALLIUM_STATIC_TARGETS_TRUE@libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cpp .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/src/gallium/Automake.inc $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc $(top_srcdir)/src/gallium/drivers/r600/Automake.inc $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/targets/omx-bellagio/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/gallium/targets/omx-bellagio/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(top_srcdir)/src/gallium/Automake.inc $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc $(top_srcdir)/src/gallium/drivers/r600/Automake.inc $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-omxLTLIBRARIES: $(omx_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(omx_LTLIBRARIES)'; test -n "$(omxdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(omxdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(omxdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(omxdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(omxdir)"; \ + } + +uninstall-omxLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(omx_LTLIBRARIES)'; test -n "$(omxdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(omxdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(omxdir)/$$f"; \ + done + +clean-omxLTLIBRARIES: + -test -z "$(omx_LTLIBRARIES)" || rm -f $(omx_LTLIBRARIES) + @list='$(omx_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libomx_mesa.la: $(libomx_mesa_la_OBJECTS) $(libomx_mesa_la_DEPENDENCIES) $(EXTRA_libomx_mesa_la_DEPENDENCIES) + $(AM_V_CXXLD)$(libomx_mesa_la_LINK) -rpath $(omxdir) $(libomx_mesa_la_OBJECTS) $(libomx_mesa_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libomx_mesa_la-dummy.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libomx_mesa_la-target.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libomx_mesa_la-target.lo: target.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libomx_mesa_la-target.lo -MD -MP -MF $(DEPDIR)/libomx_mesa_la-target.Tpo -c -o libomx_mesa_la-target.lo `test -f 'target.c' || echo '$(srcdir)/'`target.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libomx_mesa_la-target.Tpo $(DEPDIR)/libomx_mesa_la-target.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='target.c' object='libomx_mesa_la-target.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libomx_mesa_la-target.lo `test -f 'target.c' || echo '$(srcdir)/'`target.c + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +libomx_mesa_la-dummy.lo: dummy.cpp +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libomx_mesa_la-dummy.lo -MD -MP -MF $(DEPDIR)/libomx_mesa_la-dummy.Tpo -c -o libomx_mesa_la-dummy.lo `test -f 'dummy.cpp' || echo '$(srcdir)/'`dummy.cpp +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libomx_mesa_la-dummy.Tpo $(DEPDIR)/libomx_mesa_la-dummy.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='dummy.cpp' object='libomx_mesa_la-dummy.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libomx_mesa_la-dummy.lo `test -f 'dummy.cpp' || echo '$(srcdir)/'`dummy.cpp + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(omxdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-omxLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-omxLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-omxLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-omxLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-omxLTLIBRARIES install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-omxLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym b/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym new file mode 100644 index 000000000..e8a287600 --- /dev/null +++ b/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym @@ -0,0 +1,11 @@ +{ + global: + omx_component_library_Setup; + + # Workaround for an LLVM warning with -simplifycfg-sink-common + # due to LLVM being initialized multiple times. + radeon_drm_winsys_create; + amdgpu_winsys_create; + local: + *; +}; diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/target.c b/lib/mesa/src/gallium/targets/omx-bellagio/target.c new file mode 100644 index 000000000..308e23bb4 --- /dev/null +++ b/lib/mesa/src/gallium/targets/omx-bellagio/target.c @@ -0,0 +1,2 @@ +#include "target-helpers/drm_helper.h" +#include "target-helpers/sw_helper.h" diff --git a/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh b/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh index 85fb9ea4e..097774336 100644 --- a/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh +++ b/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh @@ -2,12 +2,12 @@ FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR -DCL CONST[1] -DCL CONST[3] +DCL CONST[0][1] +DCL CONST[0][3] DCL TEMP[0..1] -ADD TEMP[0], IN[0], CONST[1] -RCP TEMP[1], CONST[3].xxxx +ADD TEMP[0], IN[0], CONST[0][1] +RCP TEMP[1], CONST[0][3].xxxx MUL OUT[0], TEMP[0], TEMP[1] END diff --git a/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh b/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh index e227917fd..0b05ca8b6 100644 --- a/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh +++ b/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh @@ -4,13 +4,13 @@ DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR -DCL CONST[1] -DCL CONST[3] +DCL CONST[0][1] +DCL CONST[0][3] DCL TEMP[0..1] MOV OUT[0], IN[0] -ADD TEMP[0], IN[1], CONST[1] -RCP TEMP[1], CONST[3].xxxx +ADD TEMP[0], IN[1], CONST[0][1] +RCP TEMP[1], CONST[0][3].xxxx MUL OUT[1], TEMP[0], TEMP[1] END diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am new file mode 100644 index 000000000..fc5d1ca57 --- /dev/null +++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am @@ -0,0 +1,31 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_WINSYS_CFLAGS) + +noinst_LTLIBRARIES = libvc5drm.la + +libvc5drm_la_SOURCES = $(C_SOURCES) diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in new file mode 100644 index 000000000..9884b0eea --- /dev/null +++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in @@ -0,0 +1,882 @@ +# Makefile.in generated by automake 1.15.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2017 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@HAVE_LIBDRM_TRUE@am__append_1 = \ +@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) + +@HAVE_DRISW_TRUE@am__append_2 = \ +@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la + +@HAVE_DRISW_KMS_TRUE@am__append_3 = \ +@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ +@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) + +subdir = src/gallium/winsys/vc5/drm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_gnu_make.m4 \ + $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ + $(top_srcdir)/m4/ax_gcc_builtin.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_flex.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/VERSION $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libvc5drm_la_LIBADD = +am__objects_1 = vc5_drm_winsys.lo +am_libvc5drm_la_OBJECTS = $(am__objects_1) +libvc5drm_la_OBJECTS = $(am_libvc5drm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libvc5drm_la_SOURCES) +DIST_SOURCES = $(libvc5drm_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ + $(top_srcdir)/bin/depcomp \ + $(top_srcdir)/src/gallium/Automake.inc +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ +AMDGPU_LIBS = @AMDGPU_LIBS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +ANDROID_CFLAGS = @ANDROID_CFLAGS@ +ANDROID_LIBS = @ANDROID_LIBS@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BSYMBOLIC = @BSYMBOLIC@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ +CLOCK_LIB = @CLOCK_LIB@ +CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ +DEFINES = @DEFINES@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLOPEN_LIBS = @DLOPEN_LIBS@ +DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ +DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRIGL_CFLAGS = @DRIGL_CFLAGS@ +DRIGL_LIBS = @DRIGL_LIBS@ +DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ +DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ +DRI_LIB_DEPS = @DRI_LIB_DEPS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGL_CFLAGS = @EGL_CFLAGS@ +EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGREP = @EGREP@ +ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ +ETNAVIV_LIBS = @ETNAVIV_LIBS@ +EXEEXT = @EXEEXT@ +EXPAT_CFLAGS = @EXPAT_CFLAGS@ +EXPAT_LIBS = @EXPAT_LIBS@ +FGREP = @FGREP@ +FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ +FREEDRENO_LIBS = @FREEDRENO_LIBS@ +GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ +GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ +GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ +GC_SECTIONS = @GC_SECTIONS@ +GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ +GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ +GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ +GLPROTO_LIBS = @GLPROTO_LIBS@ +GLVND_CFLAGS = @GLVND_CFLAGS@ +GLVND_LIBS = @GLVND_LIBS@ +GLX_TLS = @GLX_TLS@ +GL_LIB = @GL_LIB@ +GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GREP = @GREP@ +HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +I915_CFLAGS = @I915_CFLAGS@ +I915_LIBS = @I915_LIBS@ +INDENT = @INDENT@ +INDENT_FLAGS = @INDENT_FLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBATOMIC_LIBS = @LIBATOMIC_LIBS@ +LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ +LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ +LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ +LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBELF_CFLAGS = @LIBELF_CFLAGS@ +LIBELF_LIBS = @LIBELF_LIBS@ +LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIB_DIR = @LIB_DIR@ +LIB_EXT = @LIB_EXT@ +LIPO = @LIPO@ +LLVM_CFLAGS = @LLVM_CFLAGS@ +LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ +LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ +LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBS = @LLVM_LIBS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ +MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ +NINE_MAJOR = @NINE_MAJOR@ +NINE_MINOR = @NINE_MINOR@ +NINE_TINY = @NINE_TINY@ +NINE_VERSION = @NINE_VERSION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ +NOUVEAU_LIBS = @NOUVEAU_LIBS@ +NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ +NVVIEUX_LIBS = @NVVIEUX_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@ +OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@ +OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@ +OPENCL_LIBNAME = @OPENCL_LIBNAME@ +OPENCL_VERSION = @OPENCL_VERSION@ +OSMESA_LIB = @OSMESA_LIB@ +OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ +OSMESA_PC_REQ = @OSMESA_PC_REQ@ +OSMESA_VERSION = @OSMESA_VERSION@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSIX_SHELL = @POSIX_SHELL@ +PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ +PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PWR8_CFLAGS = @PWR8_CFLAGS@ +PYTHON2 = @PYTHON2@ +RADEON_CFLAGS = @RADEON_CFLAGS@ +RADEON_LIBS = @RADEON_LIBS@ +RANLIB = @RANLIB@ +RM = @RM@ +SED = @SED@ +SELINUX_CFLAGS = @SELINUX_CFLAGS@ +SELINUX_LIBS = @SELINUX_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ +SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ +SSE41_CFLAGS = @SSE41_CFLAGS@ +STRIP = @STRIP@ +SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ +SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ +SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ +SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@ +SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VA_CFLAGS = @VA_CFLAGS@ +VA_LIBS = @VA_LIBS@ +VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ +VA_MAJOR = @VA_MAJOR@ +VA_MINOR = @VA_MINOR@ +VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@ +VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@ +VDPAU_CFLAGS = @VDPAU_CFLAGS@ +VDPAU_LIBS = @VDPAU_LIBS@ +VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ +VDPAU_MAJOR = @VDPAU_MAJOR@ +VDPAU_MINOR = @VDPAU_MINOR@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +VL_CFLAGS = @VL_CFLAGS@ +VL_LIBS = @VL_LIBS@ +VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@ +WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@ +WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@ +WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@ +WAYLAND_SCANNER = @WAYLAND_SCANNER@ +WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ +WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ +WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@ +WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@ +WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@ +X11_INCLUDES = @X11_INCLUDES@ +XA_MAJOR = @XA_MAJOR@ +XA_MINOR = @XA_MINOR@ +XA_TINY = @XA_TINY@ +XA_VERSION = @XA_VERSION@ +XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ +XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@ +XCB_DRI3_LIBS = @XCB_DRI3_LIBS@ +XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ +XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ +XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ +XLIBGL_LIBS = @XLIBGL_LIBS@ +XVMC_CFLAGS = @XVMC_CFLAGS@ +XVMC_LIBS = @XVMC_LIBS@ +XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ +XVMC_MAJOR = @XVMC_MAJOR@ +XVMC_MINOR = @XVMC_MINOR@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +ZLIB_CFLAGS = @ZLIB_CFLAGS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acv_mako_found = @acv_mako_found@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +ifGNUmake = @ifGNUmake@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +C_SOURCES := \ + vc5_drm_public.h \ + vc5_drm_winsys.c + +GALLIUM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + + +# src/gallium/auxiliary must appear before src/gallium/drivers +# because there are stupidly two rbug_context.h files in +# different directories, and which one is included by the +# preprocessor is determined by the ordering of the -I flags. +GALLIUM_DRIVER_CFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_DRIVER_CXXFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CXXFLAGS) + +GALLIUM_TARGET_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/loader \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + -I$(top_builddir)/src/util/ \ + -I$(top_builddir)/src/gallium/drivers/ \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) \ + $(LIBDRM_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ + $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) +GALLIUM_WINSYS_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_2) $(am__append_3) +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_WINSYS_CFLAGS) + +noinst_LTLIBRARIES = libvc5drm.la +libvc5drm_la_SOURCES = $(C_SOURCES) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/winsys/vc5/drm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/gallium/winsys/vc5/drm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libvc5drm.la: $(libvc5drm_la_OBJECTS) $(libvc5drm_la_DEPENDENCIES) $(EXTRA_libvc5drm_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libvc5drm_la_OBJECTS) $(libvc5drm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_drm_winsys.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources new file mode 100644 index 000000000..ea7566f8d --- /dev/null +++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources @@ -0,0 +1,3 @@ +C_SOURCES := \ + vc5_drm_public.h \ + vc5_drm_winsys.c diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h new file mode 100644 index 000000000..6e1984815 --- /dev/null +++ b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __VC5_DRM_PUBLIC_H__ +#define __VC5_DRM_PUBLIC_H__ + +struct pipe_screen; + +struct pipe_screen *vc5_drm_screen_create(int drmFD); + +#endif /* __VC5_DRM_PUBLIC_H__ */ diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c new file mode 100644 index 000000000..d089291bf --- /dev/null +++ b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c @@ -0,0 +1,35 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <unistd.h> +#include <fcntl.h> + +#include "vc5_drm_public.h" + +#include "vc5/vc5_screen.h" + +struct pipe_screen * +vc5_drm_screen_create(int fd) +{ + return vc5_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); +} diff --git a/lib/mesa/src/git_sha1.h.in b/lib/mesa/src/git_sha1.h.in new file mode 100644 index 000000000..dc17f5e66 --- /dev/null +++ b/lib/mesa/src/git_sha1.h.in @@ -0,0 +1 @@ +#define MESA_GIT_SHA1 "git-@VCS_TAG@" diff --git a/lib/mesa/src/intel/common/gen_sample_positions.h b/lib/mesa/src/intel/common/gen_sample_positions.h index b86a7d843..f0ce95dd1 100644 --- a/lib/mesa/src/intel/common/gen_sample_positions.h +++ b/lib/mesa/src/intel/common/gen_sample_positions.h @@ -23,16 +23,38 @@ #ifndef GEN_SAMPLE_POSITIONS_H #define GEN_SAMPLE_POSITIONS_H +/* + * This file defines the standard multisample positions used by both GL and + * Vulkan. These correspond to the Vulkan "standard sample locations". + */ + +/** + * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8). + */ #define GEN_SAMPLE_POS_1X(prefix) \ prefix##0XOffset = 0.5; \ prefix##0YOffset = 0.5; +/** + * 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75): + * 4 c + * 4 0 + * c 1 + */ #define GEN_SAMPLE_POS_2X(prefix) \ prefix##0XOffset = 0.25; \ prefix##0YOffset = 0.25; \ prefix##1XOffset = 0.75; \ prefix##1YOffset = 0.75; +/** + * Sample positions: + * 2 6 a e + * 2 0 + * 6 1 + * a 2 + * e 3 + */ #define GEN_SAMPLE_POS_4X(prefix) \ prefix##0XOffset = 0.375; \ prefix##0YOffset = 0.125; \ @@ -43,6 +65,28 @@ prefix##2YOffset = 0.625; \ prefix##3XOffset = 0.625; \ prefix##3YOffset = 0.875; +/** + * Sample positions: + * + * From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE: + * Programming Notes): + * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and + * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7 + * for 8X) must have monotonically increasing distance from the + * pixel center. This is required to get the correct centroid + * computation in the device." + * + * Sample positions: + * 1 3 5 7 9 b d f + * 1 7 + * 3 3 + * 5 0 + * 7 5 + * 9 2 + * b 1 + * d 4 + * f 6 + */ #define GEN_SAMPLE_POS_8X(prefix) \ prefix##0XOffset = 0.5625; \ prefix##0YOffset = 0.3125; \ @@ -61,6 +105,27 @@ prefix##6YOffset = 0.9375; \ prefix##7XOffset = 0.9375; \ prefix##7YOffset = 0.0625; +/** + * Sample positions: + * + * 0 1 2 3 4 5 6 7 8 9 a b c d e f + * 0 15 + * 1 9 + * 2 10 + * 3 7 + * 4 13 + * 5 1 + * 6 4 + * 7 3 + * 8 12 + * 9 0 + * a 2 + * b 6 + * c 11 + * d 5 + * e 8 + * f 14 + */ #define GEN_SAMPLE_POS_16X(prefix) \ prefix##0XOffset = 0.5625; \ prefix##0YOffset = 0.5625; \ diff --git a/lib/mesa/src/intel/common/intel_log.c b/lib/mesa/src/intel/common/intel_log.c new file mode 100644 index 000000000..cebdd6dd6 --- /dev/null +++ b/lib/mesa/src/intel/common/intel_log.c @@ -0,0 +1,87 @@ +/* + * Copyright © 2017 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdarg.h> + +#ifdef ANDROID +#include <android/log.h> +#else +#include <stdio.h> +#endif + +#include "intel_log.h" + +#ifdef ANDROID +static inline android_LogPriority +level_to_android(enum intel_log_level l) +{ + switch (l) { + case INTEL_LOG_ERROR: return ANDROID_LOG_ERROR; + case INTEL_LOG_WARN: return ANDROID_LOG_WARN; + case INTEL_LOG_INFO: return ANDROID_LOG_INFO; + case INTEL_LOG_DEBUG: return ANDROID_LOG_DEBUG; + } + + unreachable("bad intel_log_level"); +} +#endif + +#ifndef ANDROID +static inline const char * +level_to_str(enum intel_log_level l) +{ + switch (l) { + case INTEL_LOG_ERROR: return "error"; + case INTEL_LOG_WARN: return "warning"; + case INTEL_LOG_INFO: return "info"; + case INTEL_LOG_DEBUG: return "debug"; + } + + unreachable("bad intel_log_level"); +} +#endif + +void +intel_log(enum intel_log_level level, const char *tag, const char *format, ...) +{ + va_list va; + + va_start(va, format); + intel_log_v(level, tag, format, va); + va_end(va); +} + +void +intel_log_v(enum intel_log_level level, const char *tag, const char *format, + va_list va) +{ +#ifdef ANDROID + __android_log_vprint(level_to_android(level), tag, format, va); +#else + flockfile(stderr); + fprintf(stderr, "%s: %s: ", tag, level_to_str(level)); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + funlockfile(stderr); +#endif +} diff --git a/lib/mesa/src/intel/common/intel_log.h b/lib/mesa/src/intel/common/intel_log.h new file mode 100644 index 000000000..0f28109a0 --- /dev/null +++ b/lib/mesa/src/intel/common/intel_log.h @@ -0,0 +1,82 @@ +/* + * Copyright 2017 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef INTEL_LOG_H +#define INTEL_LOG_H + +#include <stdarg.h> + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef INTEL_LOG_TAG +#define INTEL_LOG_TAG "INTEL-MESA" +#endif + +enum intel_log_level { + INTEL_LOG_ERROR, + INTEL_LOG_WARN, + INTEL_LOG_INFO, + INTEL_LOG_DEBUG, +}; + +void PRINTFLIKE(3, 4) +intel_log(enum intel_log_level, const char *tag, const char *format, ...); + +void +intel_log_v(enum intel_log_level, const char *tag, const char *format, + va_list va); + +#define intel_loge(fmt, ...) intel_log(INTEL_LOG_ERROR, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__) +#define intel_logw(fmt, ...) intel_log(INTEL_LOG_WARN, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__) +#define intel_logi(fmt, ...) intel_log(INTEL_LOG_INFO, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__) +#ifdef DEBUG +#define intel_logd(fmt, ...) intel_log(INTEL_LOG_DEBUG, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__) +#else +#define intel_logd(fmt, ...) __intel_log_use_args((fmt), ##__VA_ARGS__) +#endif + +#define intel_loge_v(fmt, va) intel_log_v(INTEL_LOG_ERROR, (INTEL_LOG_TAG), (fmt), (va)) +#define intel_logw_v(fmt, va) intel_log_v(INTEL_LOG_WARN, (INTEL_LOG_TAG), (fmt), (va)) +#define intel_logi_v(fmt, va) intel_log_v(INTEL_LOG_INFO, (INTEL_LOG_TAG), (fmt), (va)) +#ifdef DEBUG +#define intel_logd_v(fmt, va) intel_log_v(INTEL_LOG_DEBUG, (INTEL_LOG_TAG), (fmt), (va)) +#else +#define intel_logd_v(fmt, va) __intel_log_use_args((fmt), (va)) +#endif + + +#ifndef DEBUG +/* Suppres -Wunused */ +static inline void PRINTFLIKE(1, 2) +__intel_log_use_args(const char *format, ...) { } +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* INTEL_LOG_H */ diff --git a/lib/mesa/src/intel/isl/isl_genX_priv.h b/lib/mesa/src/intel/isl/isl_genX_priv.h new file mode 100644 index 000000000..a005e1c7b --- /dev/null +++ b/lib/mesa/src/intel/isl/isl_genX_priv.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * NOTE: The header can be included multiple times, from the same file. + */ + +/* + * Gen-specific function declarations. This header must *not* be included + * directly. Instead, it is included multiple times by isl_priv.h + * + * In this header file, the usual isl_genX() macro is available. + */ + +void +isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, + const struct isl_surf_fill_state_info *restrict info); + +void +isl_genX(buffer_fill_state_s)(void *state, + const struct isl_buffer_fill_state_info *restrict info); + +void +isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + +void +isl_genX(null_fill_state)(void *state, struct isl_extent3d size); diff --git a/lib/mesa/src/intel/vulkan/anv_android.c b/lib/mesa/src/intel/vulkan/anv_android.c new file mode 100644 index 000000000..b1bbbb682 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_android.c @@ -0,0 +1,416 @@ +/* + * Copyright © 2017, Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <hardware/gralloc.h> +#include <hardware/hardware.h> +#include <hardware/hwvulkan.h> +#include <vulkan/vk_android_native_buffer.h> +#include <vulkan/vk_icd.h> +#include <sync/sync.h> + +#include "anv_private.h" + +static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); +static int anv_hal_close(struct hw_device_t *dev); + +static void UNUSED +static_asserts(void) +{ + STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); +} + +PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { + .common = { + .tag = HARDWARE_MODULE_TAG, + .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, + .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), + .id = HWVULKAN_HARDWARE_MODULE_ID, + .name = "Intel Vulkan HAL", + .author = "Intel", + .methods = &(hw_module_methods_t) { + .open = anv_hal_open, + }, + }, +}; + +/* If any bits in test_mask are set, then unset them and return true. */ +static inline bool +unmask32(uint32_t *inout_mask, uint32_t test_mask) +{ + uint32_t orig_mask = *inout_mask; + *inout_mask &= ~test_mask; + return *inout_mask != orig_mask; +} + +static int +anv_hal_open(const struct hw_module_t* mod, const char* id, + struct hw_device_t** dev) +{ + assert(mod == &HAL_MODULE_INFO_SYM.common); + assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); + + hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); + if (!hal_dev) + return -1; + + *hal_dev = (hwvulkan_device_t) { + .common = { + .tag = HARDWARE_DEVICE_TAG, + .version = HWVULKAN_DEVICE_API_VERSION_0_1, + .module = &HAL_MODULE_INFO_SYM.common, + .close = anv_hal_close, + }, + .EnumerateInstanceExtensionProperties = anv_EnumerateInstanceExtensionProperties, + .CreateInstance = anv_CreateInstance, + .GetInstanceProcAddr = anv_GetInstanceProcAddr, + }; + + *dev = &hal_dev->common; + return 0; +} + +static int +anv_hal_close(struct hw_device_t *dev) +{ + /* hwvulkan.h claims that hw_device_t::close() is never called. */ + return -1; +} + +VkResult +anv_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h) + +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + VkImage image_h = VK_NULL_HANDLE; + struct anv_image *image = NULL; + struct anv_bo *bo = NULL; + VkResult result; + + struct anv_image_create_info anv_info = { + .vk_info = base_info, + .isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT, + }; + + if (gralloc_info->handle->numFds != 1) { + return vk_errorf(device->instance, device, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "VkNativeBufferANDROID::handle::numFds is %d, " + "expected 1", gralloc_info->handle->numFds); + } + + /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf + * must exceed that of the gralloc handle, and we do not own the gralloc + * handle. + */ + int dma_buf = gralloc_info->handle->data[0]; + + result = anv_bo_cache_import(device, &device->bo_cache, dma_buf, &bo); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, device, result, + "failed to import dma-buf from VkNativeBufferANDROID"); + } + + int i915_tiling = anv_gem_get_tiling(device, bo->gem_handle); + switch (i915_tiling) { + case I915_TILING_NONE: + anv_info.isl_tiling_flags = ISL_TILING_LINEAR_BIT; + break; + case I915_TILING_X: + anv_info.isl_tiling_flags = ISL_TILING_X_BIT; + break; + case I915_TILING_Y: + anv_info.isl_tiling_flags = ISL_TILING_Y0_BIT; + break; + case -1: + result = vk_errorf(device->instance, device, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "DRM_IOCTL_I915_GEM_GET_TILING failed for " + "VkNativeBufferANDROID"); + goto fail_tiling; + default: + result = vk_errorf(device->instance, device, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "DRM_IOCTL_I915_GEM_GET_TILING returned unknown " + "tiling %d for VkNativeBufferANDROID", i915_tiling); + goto fail_tiling; + } + + enum isl_format format = anv_get_isl_format(&device->info, + base_info->format, + VK_IMAGE_ASPECT_COLOR_BIT, + base_info->tiling); + assert(format != ISL_FORMAT_UNSUPPORTED); + + anv_info.stride = gralloc_info->stride * + (isl_format_get_layout(format)->bpb / 8); + + result = anv_image_create(device_h, &anv_info, alloc, &image_h); + image = anv_image_from_handle(image_h); + if (result != VK_SUCCESS) + goto fail_create; + + if (bo->size < image->size) { + result = vk_errorf(device, device->instance, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "dma-buf from VkNativeBufferANDROID is too small for " + "VkImage: %"PRIu64"B < %"PRIu64"B", + bo->size, image->size); + goto fail_size; + } + + assert(image->n_planes == 1); + assert(image->planes[0].bo_offset == 0); + + image->planes[0].bo = bo; + image->planes[0].bo_is_owned = true; + + /* We need to set the WRITE flag on window system buffers so that GEM will + * know we're writing to them and synchronize uses on other rings (for + * example, if the display server uses the blitter ring). + * + * If this function fails and if the imported bo was resident in the cache, + * we should avoid updating the bo's flags. Therefore, we defer updating + * the flags until success is certain. + * + */ + bo->flags &= ~EXEC_OBJECT_ASYNC; + bo->flags |= EXEC_OBJECT_WRITE; + + /* Don't clobber the out-parameter until success is certain. */ + *out_image_h = image_h; + + return VK_SUCCESS; + + fail_size: + anv_DestroyImage(device_h, image_h, alloc); + fail_create: + fail_tiling: + anv_bo_cache_release(device, &device->bo_cache, bo); + + return result; +} + +VkResult anv_GetSwapchainGrallocUsageANDROID( + VkDevice device_h, + VkFormat format, + VkImageUsageFlags imageUsage, + int* grallocUsage) +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + struct anv_physical_device *phys_dev = &device->instance->physicalDevice; + VkPhysicalDevice phys_dev_h = anv_physical_device_to_handle(phys_dev); + VkResult result; + + *grallocUsage = 0; + intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage); + + /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. + * The relevant code in libvulkan/swapchain.cpp contains this fun comment: + * + * TODO(jessehall): I think these are right, but haven't thought hard + * about it. Do we need to query the driver for support of any of + * these? + * + * Any disagreement between this function and the hardcoded + * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests + * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. + */ + + const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = imageUsage, + }; + + VkImageFormatProperties2KHR image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, + }; + + /* Check that requested format and usage are supported. */ + result = anv_GetPhysicalDeviceImageFormatProperties2KHR(phys_dev_h, + &image_format_info, &image_format_props); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, device, result, + "anv_GetPhysicalDeviceImageFormatProperties2KHR failed " + "inside %s", __func__); + } + + /* Reject STORAGE here to avoid complexity elsewhere. */ + if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) { + return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED, + "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc " + "swapchain"); + } + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_RENDER; + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; + + /* All VkImageUsageFlags not explicitly checked here are unsupported for + * gralloc swapchains. + */ + if (imageUsage != 0) { + return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkImageUsageFlags(0x%x) for gralloc " + "swapchain", imageUsage); + } + + /* The below formats support GRALLOC_USAGE_HW_FB (that is, display + * scanout). This short list of formats is univserally supported on Intel + * but is incomplete. The full set of supported formats is dependent on + * kernel and hardware. + * + * FINISHME: Advertise all display-supported formats. + */ + if (format == VK_FORMAT_B8G8R8A8_UNORM || + format == VK_FORMAT_B5G6R5_UNORM_PACK16) { + *grallocUsage |= GRALLOC_USAGE_HW_FB | + GRALLOC_USAGE_HW_COMPOSER | + GRALLOC_USAGE_EXTERNAL_DISP; + } + + if (*grallocUsage == 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + return VK_SUCCESS; +} + +VkResult +anv_AcquireImageANDROID( + VkDevice device_h, + VkImage image_h, + int nativeFenceFd, + VkSemaphore semaphore_h, + VkFence fence_h) +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + VkResult result = VK_SUCCESS; + + if (nativeFenceFd != -1) { + /* As a simple, firstpass implementation of VK_ANDROID_native_buffer, we + * block on the nativeFenceFd. This may introduce latency and is + * definitiely inefficient, yet it's correct. + * + * FINISHME(chadv): Import the nativeFenceFd into the VkSemaphore and + * VkFence. + */ + if (sync_wait(nativeFenceFd, /*timeout*/ -1) < 0) { + result = vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, + "%s: failed to wait on nativeFenceFd=%d", + __func__, nativeFenceFd); + } + + /* From VK_ANDROID_native_buffer's pseudo spec + * (https://source.android.com/devices/graphics/implement-vulkan): + * + * The driver takes ownership of the fence fd and is responsible for + * closing it [...] even if vkAcquireImageANDROID fails and returns + * an error. + */ + close(nativeFenceFd); + + if (result != VK_SUCCESS) + return result; + } + + if (semaphore_h || fence_h) { + /* Thanks to implicit sync, the image is ready for GPU access. But we + * must still put the semaphore into the "submit" state; otherwise the + * client may get unexpected behavior if the client later uses it as + * a wait semaphore. + * + * Because we blocked above on the nativeFenceFd, the image is also + * ready for foreign-device access (including CPU access). But we must + * still signal the fence; otherwise the client may get unexpected + * behavior if the client later waits on it. + * + * For some values of anv_semaphore_type, we must submit the semaphore + * to execbuf in order to signal it. Likewise for anv_fence_type. + * Instead of open-coding here the signal operation for each + * anv_semaphore_type and anv_fence_type, we piggy-back on + * vkQueueSubmit. + */ + const VkSubmitInfo submit = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = 0, + .commandBufferCount = 0, + .signalSemaphoreCount = (semaphore_h ? 1 : 0), + .pSignalSemaphores = &semaphore_h, + }; + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &submit, fence_h); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, device, result, + "anv_QueueSubmit failed inside %s", __func__); + } + } + + return VK_SUCCESS; +} + +VkResult +anv_QueueSignalReleaseImageANDROID( + VkQueue queue, + uint32_t waitSemaphoreCount, + const VkSemaphore* pWaitSemaphores, + VkImage image, + int* pNativeFenceFd) +{ + VkResult result; + + if (waitSemaphoreCount == 0) + goto done; + + result = anv_QueueSubmit(queue, 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = 1, + .pWaitSemaphores = pWaitSemaphores, + }, + (VkFence) VK_NULL_HANDLE); + if (result != VK_SUCCESS) + return result; + + done: + if (pNativeFenceFd) { + /* We can rely implicit on sync because above we submitted all + * semaphores to the queue. + */ + *pNativeFenceFd = -1; + } + + return VK_SUCCESS; +} diff --git a/lib/mesa/src/intel/vulkan/anv_debug_report.c b/lib/mesa/src/intel/vulkan/anv_debug_report.c new file mode 100644 index 000000000..55d62057c --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_debug_report.c @@ -0,0 +1,119 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "vk_util.h" + +/* This file contains implementation for VK_EXT_debug_report. */ + +VkResult +anv_CreateDebugReportCallbackEXT(VkInstance _instance, + const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDebugReportCallbackEXT* pCallback) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + struct anv_debug_report_callback *cb = + vk_alloc2(&instance->alloc, pAllocator, + sizeof(struct anv_debug_report_callback), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (!cb) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cb->flags = pCreateInfo->flags; + cb->callback = pCreateInfo->pfnCallback; + cb->data = pCreateInfo->pUserData; + + pthread_mutex_lock(&instance->callbacks_mutex); + list_addtail(&cb->link, &instance->callbacks); + pthread_mutex_unlock(&instance->callbacks_mutex); + + *pCallback = anv_debug_report_callback_to_handle(cb); + + return VK_SUCCESS; +} + +void +anv_DestroyDebugReportCallbackEXT(VkInstance _instance, + VkDebugReportCallbackEXT _callback, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(anv_debug_report_callback, callback, _callback); + + /* Remove from list and destroy given callback. */ + pthread_mutex_lock(&instance->callbacks_mutex); + list_del(&callback->link); + vk_free2(&instance->alloc, pAllocator, callback); + pthread_mutex_unlock(&instance->callbacks_mutex); +} + +void +anv_DebugReportMessageEXT(VkInstance _instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + anv_debug_report(instance, flags, objectType, object, + location, messageCode, pLayerPrefix, pMessage); +} + +void +anv_debug_report(struct anv_instance *instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT object_type, + uint64_t handle, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char *pMessage) +{ + /* Allow NULL for convinience, return if no callbacks registered. */ + if (!instance || list_empty(&instance->callbacks)) + return; + + pthread_mutex_lock(&instance->callbacks_mutex); + + /* Section 33.2 of the Vulkan 1.0.59 spec says: + * + * "callback is an externally synchronized object and must not be + * used on more than one thread at a time. This means that + * vkDestroyDebugReportCallbackEXT must not be called when a callback + * is active." + */ + list_for_each_entry(struct anv_debug_report_callback, cb, + &instance->callbacks, link) { + if (cb->flags & flags) + cb->callback(flags, object_type, handle, location, messageCode, + pLayerPrefix, pMessage, cb->data); + } + + pthread_mutex_unlock(&instance->callbacks_mutex); +} diff --git a/lib/mesa/src/intel/vulkan/anv_dump.c b/lib/mesa/src/intel/vulkan/anv_dump.c index 060890421..160c18c4f 100644 --- a/lib/mesa/src/intel/vulkan/anv_dump.c +++ b/lib/mesa/src/intel/vulkan/anv_dump.c @@ -424,20 +424,25 @@ anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer, uint32_t b; for_each_bit(b, iview->image->aspects) { VkImageAspectFlagBits aspect = (1 << b); - char suffix; + const char *suffix; switch (aspect) { - case VK_IMAGE_ASPECT_COLOR_BIT: suffix = 'c'; break; - case VK_IMAGE_ASPECT_DEPTH_BIT: suffix = 'd'; break; - case VK_IMAGE_ASPECT_STENCIL_BIT: suffix = 's'; break; + case VK_IMAGE_ASPECT_COLOR_BIT: suffix = "c"; break; + case VK_IMAGE_ASPECT_DEPTH_BIT: suffix = "d"; break; + case VK_IMAGE_ASPECT_STENCIL_BIT: suffix = "s"; break; + case VK_IMAGE_ASPECT_PLANE_0_BIT_KHR: suffix = "c0"; break; + case VK_IMAGE_ASPECT_PLANE_1_BIT_KHR: suffix = "c1"; break; + case VK_IMAGE_ASPECT_PLANE_2_BIT_KHR: suffix = "c2"; break; default: unreachable("Invalid aspect"); } - char *filename = ralloc_asprintf(dump_ctx, "framebuffer%04d-%d%c.ppm", + char *filename = ralloc_asprintf(dump_ctx, "framebuffer%04d-%d%s.ppm", dump_idx, i, suffix); + unsigned plane = anv_image_aspect_to_plane(iview->image->aspects, aspect); dump_add_image(cmd_buffer, (struct anv_image *)iview->image, aspect, - iview->isl.base_level, iview->isl.base_array_layer, + iview->planes[plane].isl.base_level, + iview->planes[plane].isl.base_array_layer, filename); } } diff --git a/lib/mesa/src/intel/vulkan/anv_extensions.c b/lib/mesa/src/intel/vulkan/anv_extensions.c new file mode 100644 index 000000000..a5c1ea68a --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_extensions.c @@ -0,0 +1,447 @@ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include "vk_util.h" + +/* Convert the VK_USE_PLATFORM_* defines to booleans */ +#ifdef VK_USE_PLATFORM_ANDROID_KHR +# undef VK_USE_PLATFORM_ANDROID_KHR +# define VK_USE_PLATFORM_ANDROID_KHR true +#else +# define VK_USE_PLATFORM_ANDROID_KHR false +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +# undef VK_USE_PLATFORM_WAYLAND_KHR +# define VK_USE_PLATFORM_WAYLAND_KHR true +#else +# define VK_USE_PLATFORM_WAYLAND_KHR false +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR +# undef VK_USE_PLATFORM_XCB_KHR +# define VK_USE_PLATFORM_XCB_KHR true +#else +# define VK_USE_PLATFORM_XCB_KHR false +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR +# undef VK_USE_PLATFORM_XLIB_KHR +# define VK_USE_PLATFORM_XLIB_KHR true +#else +# define VK_USE_PLATFORM_XLIB_KHR false +#endif + +/* And ANDROID too */ +#ifdef ANDROID +# undef ANDROID +# define ANDROID true +#else +# define ANDROID false +#endif + +#define ANV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR) + +bool +anv_instance_extension_supported(const char *name) +{ + if (strcmp(name, "VK_KHR_external_fence_capabilities") == 0) + return true; + if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0) + return true; + if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0) + return true; + if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0) + return true; + if (strcmp(name, "VK_KHR_get_surface_capabilities2") == 0) + return ANV_HAS_SURFACE; + if (strcmp(name, "VK_KHR_surface") == 0) + return ANV_HAS_SURFACE; + if (strcmp(name, "VK_KHR_wayland_surface") == 0) + return VK_USE_PLATFORM_WAYLAND_KHR; + if (strcmp(name, "VK_KHR_xcb_surface") == 0) + return VK_USE_PLATFORM_XCB_KHR; + if (strcmp(name, "VK_KHR_xlib_surface") == 0) + return VK_USE_PLATFORM_XLIB_KHR; + if (strcmp(name, "VK_EXT_debug_report") == 0) + return true; + return false; +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_fence_capabilities", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory_capabilities", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore_capabilities", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_get_physical_device_properties2", + .specVersion = 1, + }; + } + } + if (ANV_HAS_SURFACE) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_get_surface_capabilities2", + .specVersion = 1, + }; + } + } + if (ANV_HAS_SURFACE) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_surface", + .specVersion = 25, + }; + } + } + if (VK_USE_PLATFORM_WAYLAND_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_wayland_surface", + .specVersion = 6, + }; + } + } + if (VK_USE_PLATFORM_XCB_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_xcb_surface", + .specVersion = 6, + }; + } + } + if (VK_USE_PLATFORM_XLIB_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_xlib_surface", + .specVersion = 6, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_EXT_debug_report", + .specVersion = 8, + }; + } + } + + return vk_outarray_status(&out); +} + +uint32_t +anv_physical_device_api_version(struct anv_physical_device *dev) +{ + return VK_MAKE_VERSION(1, 0, 57); +} + +bool +anv_physical_device_extension_supported(struct anv_physical_device *device, + const char *name) +{ + if (strcmp(name, "VK_ANDROID_native_buffer") == 0) + return ANDROID; + if (strcmp(name, "VK_KHR_bind_memory2") == 0) + return true; + if (strcmp(name, "VK_KHR_dedicated_allocation") == 0) + return true; + if (strcmp(name, "VK_KHR_descriptor_update_template") == 0) + return true; + if (strcmp(name, "VK_KHR_external_fence") == 0) + return device->has_syncobj_wait; + if (strcmp(name, "VK_KHR_external_fence_fd") == 0) + return device->has_syncobj_wait; + if (strcmp(name, "VK_KHR_external_memory") == 0) + return true; + if (strcmp(name, "VK_KHR_external_memory_fd") == 0) + return true; + if (strcmp(name, "VK_KHR_external_semaphore") == 0) + return true; + if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0) + return true; + if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0) + return true; + if (strcmp(name, "VK_KHR_image_format_list") == 0) + return true; + if (strcmp(name, "VK_KHR_incremental_present") == 0) + return true; + if (strcmp(name, "VK_KHR_maintenance1") == 0) + return true; + if (strcmp(name, "VK_KHR_maintenance2") == 0) + return true; + if (strcmp(name, "VK_KHR_push_descriptor") == 0) + return true; + if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0) + return true; + if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0) + return true; + if (strcmp(name, "VK_KHR_sampler_ycbcr_conversion") == 0) + return true; + if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0) + return true; + if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0) + return true; + if (strcmp(name, "VK_KHR_swapchain") == 0) + return ANV_HAS_SURFACE; + if (strcmp(name, "VK_KHR_variable_pointers") == 0) + return true; + if (strcmp(name, "VK_KHX_multiview") == 0) + return false; + return false; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + (void)device; + + if (ANDROID) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_ANDROID_native_buffer", + .specVersion = 5, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_bind_memory2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_dedicated_allocation", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_descriptor_update_template", + .specVersion = 1, + }; + } + } + if (device->has_syncobj_wait) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_fence", + .specVersion = 1, + }; + } + } + if (device->has_syncobj_wait) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_fence_fd", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory_fd", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore_fd", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_get_memory_requirements2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_image_format_list", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_incremental_present", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_maintenance1", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_maintenance2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_push_descriptor", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_relaxed_block_layout", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_sampler_ycbcr_conversion", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_shader_draw_parameters", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_storage_buffer_storage_class", + .specVersion = 1, + }; + } + } + if (ANV_HAS_SURFACE) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_swapchain", + .specVersion = 68, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_variable_pointers", + .specVersion = 1, + }; + } + } + if (false) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHX_multiview", + .specVersion = 1, + }; + } + } + + return vk_outarray_status(&out); +} diff --git a/lib/mesa/src/intel/vulkan/anv_extensions.py b/lib/mesa/src/intel/vulkan/anv_extensions.py new file mode 100644 index 000000000..0f66c59e1 --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_extensions.py @@ -0,0 +1,284 @@ +COPYRIGHT = """\ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +""" + +import argparse +import copy +import re +import xml.etree.cElementTree as et + +MAX_API_VERSION = '1.0.57' + +class Extension: + def __init__(self, name, ext_version, enable): + self.name = name + self.ext_version = int(ext_version) + if enable is True: + self.enable = 'true'; + elif enable is False: + self.enable = 'false'; + else: + self.enable = enable; + +# On Android, we disable all surface and swapchain extensions. Android's Vulkan +# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications +# cannot access the driver's implementation. Moreoever, if the driver exposes +# the those extension strings, then tests dEQP-VK.api.info.instance.extensions +# and dEQP-VK.api.info.device fail due to the duplicated strings. +EXTENSIONS = [ + Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), + Extension('VK_KHR_bind_memory2', 1, True), + Extension('VK_KHR_dedicated_allocation', 1, True), + Extension('VK_KHR_descriptor_update_template', 1, True), + Extension('VK_KHR_external_fence', 1, + 'device->has_syncobj_wait'), + Extension('VK_KHR_external_fence_capabilities', 1, True), + Extension('VK_KHR_external_fence_fd', 1, + 'device->has_syncobj_wait'), + Extension('VK_KHR_external_memory', 1, True), + Extension('VK_KHR_external_memory_capabilities', 1, True), + Extension('VK_KHR_external_memory_fd', 1, True), + Extension('VK_KHR_external_semaphore', 1, True), + Extension('VK_KHR_external_semaphore_capabilities', 1, True), + Extension('VK_KHR_external_semaphore_fd', 1, True), + Extension('VK_KHR_get_memory_requirements2', 1, True), + Extension('VK_KHR_get_physical_device_properties2', 1, True), + Extension('VK_KHR_get_surface_capabilities2', 1, 'ANV_HAS_SURFACE'), + Extension('VK_KHR_image_format_list', 1, True), + Extension('VK_KHR_incremental_present', 1, True), + Extension('VK_KHR_maintenance1', 1, True), + Extension('VK_KHR_maintenance2', 1, True), + Extension('VK_KHR_push_descriptor', 1, True), + Extension('VK_KHR_relaxed_block_layout', 1, True), + Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), + Extension('VK_KHR_sampler_ycbcr_conversion', 1, True), + Extension('VK_KHR_shader_draw_parameters', 1, True), + Extension('VK_KHR_storage_buffer_storage_class', 1, True), + Extension('VK_KHR_surface', 25, 'ANV_HAS_SURFACE'), + Extension('VK_KHR_swapchain', 68, 'ANV_HAS_SURFACE'), + Extension('VK_KHR_variable_pointers', 1, True), + Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'), + Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), + Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), + Extension('VK_KHX_multiview', 1, False), + Extension('VK_EXT_debug_report', 8, True), +] + +class VkVersion: + def __init__(self, string): + split = string.split('.') + self.major = int(split[0]) + self.minor = int(split[1]) + if len(split) > 2: + assert len(split) == 3 + self.patch = int(split[2]) + else: + self.patch = None + + # Sanity check. The range bits are required by the definition of the + # VK_MAKE_VERSION macro + assert self.major < 1024 and self.minor < 1024 + assert self.patch is None or self.patch < 4096 + assert(str(self) == string) + + def __str__(self): + ver_list = [str(self.major), str(self.minor)] + if self.patch is not None: + ver_list.append(str(self.patch)) + return '.'.join(ver_list) + + def c_vk_version(self): + ver_list = [str(self.major), str(self.minor), str(self.patch)] + return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')' + + def __int_ver(self): + # This is just an expansion of VK_VERSION + patch = self.patch if self.patch is not None else 0 + return (self.major << 22) | (self.minor << 12) | patch + + def __cmp__(self, other): + # If only one of them has a patch version, "ignore" it by making + # other's patch version match self. + if (self.patch is None) != (other.patch is None): + other = copy.copy(other) + other.patch = self.patch + + return self.__int_ver().__cmp__(other.__int_ver()) + +MAX_API_VERSION = VkVersion(MAX_API_VERSION) + +def _init_exts_from_xml(xml): + """ Walk the Vulkan XML and fill out extra extension information. """ + + xml = et.parse(xml) + + ext_name_map = {} + for ext in EXTENSIONS: + ext_name_map[ext.name] = ext + + for ext_elem in xml.findall('.extensions/extension'): + ext_name = ext_elem.attrib['name'] + if ext_name not in ext_name_map: + continue + + # Workaround for VK_ANDROID_native_buffer. Its <extension> element in + # vk.xml lists it as supported="disabled" and provides only a stub + # definition. Its <extension> element in Mesa's custom + # vk_android_native_buffer.xml, though, lists it as + # supported='android-vendor' and fully defines the extension. We want + # to skip the <extension> element in vk.xml. + if ext_elem.attrib['supported'] == 'disabled': + assert ext_name == 'VK_ANDROID_native_buffer' + continue + + ext = ext_name_map[ext_name] + ext.type = ext_elem.attrib['type'] + +_TEMPLATE = COPYRIGHT + """ +#include "anv_private.h" + +#include "vk_util.h" + +/* Convert the VK_USE_PLATFORM_* defines to booleans */ +%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']: +#ifdef VK_USE_PLATFORM_${platform}_KHR +# undef VK_USE_PLATFORM_${platform}_KHR +# define VK_USE_PLATFORM_${platform}_KHR true +#else +# define VK_USE_PLATFORM_${platform}_KHR false +#endif +%endfor + +/* And ANDROID too */ +#ifdef ANDROID +# undef ANDROID +# define ANDROID true +#else +# define ANDROID false +#endif + +#define ANV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\ + VK_USE_PLATFORM_XCB_KHR || \\ + VK_USE_PLATFORM_XLIB_KHR) + +bool +anv_instance_extension_supported(const char *name) +{ +%for ext in instance_extensions: + if (strcmp(name, "${ext.name}") == 0) + return ${ext.enable}; +%endfor + return false; +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + +%for ext in instance_extensions: + if (${ext.enable}) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "${ext.name}", + .specVersion = ${ext.ext_version}, + }; + } + } +%endfor + + return vk_outarray_status(&out); +} + +uint32_t +anv_physical_device_api_version(struct anv_physical_device *dev) +{ + return ${MAX_API_VERSION.c_vk_version()}; +} + +bool +anv_physical_device_extension_supported(struct anv_physical_device *device, + const char *name) +{ +%for ext in device_extensions: + if (strcmp(name, "${ext.name}") == 0) + return ${ext.enable}; +%endfor + return false; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + (void)device; + +%for ext in device_extensions: + if (${ext.enable}) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "${ext.name}", + .specVersion = ${ext.ext_version}, + }; + } + } +%endfor + + return vk_outarray_status(&out); +} +""" + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', help='Output C file.', required=True) + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, + action='append', + dest='xml_files') + args = parser.parse_args() + + for filename in args.xml_files: + _init_exts_from_xml(filename) + + for ext in EXTENSIONS: + assert ext.type == 'instance' or ext.type == 'device' + + template_env = { + 'MAX_API_VERSION': MAX_API_VERSION, + 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'], + 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'], + } + + from mako.template import Template + + with open(args.out, 'w') as f: + f.write(Template(_TEMPLATE).render(**template_env)) diff --git a/lib/mesa/src/intel/vulkan/anv_icd.py b/lib/mesa/src/intel/vulkan/anv_icd.py new file mode 100644 index 000000000..31bb0687a --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_icd.py @@ -0,0 +1,47 @@ +# Copyright 2017 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import json +import os.path + +from anv_extensions import * + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', help='Output jsono file.', required=True) + parser.add_argument('--lib-path', help='Path to libvulkan_intel.so') + args = parser.parse_args() + + path = 'libvulkan_intel.so' + if args.lib_path: + path = os.path.join(args.lib_path, path) + + json_data = { + 'file_format_version': '1.0.0', + 'ICD': { + 'library_path': path, + 'api_version': str(MAX_API_VERSION), + }, + } + + with open(args.out, 'w') as f: + json.dump(json_data, f, indent = 4, sort_keys=True) diff --git a/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c b/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c new file mode 100644 index 000000000..028f24e2f --- /dev/null +++ b/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c @@ -0,0 +1,475 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "anv_private.h" +#include "nir/nir.h" +#include "nir/nir_builder.h" + +struct ycbcr_state { + nir_builder *builder; + nir_ssa_def *image_size; + nir_tex_instr *origin_tex; + struct anv_ycbcr_conversion *conversion; +}; + +static nir_ssa_def * +y_range(nir_builder *b, + nir_ssa_def *y_channel, + int bpc, + VkSamplerYcbcrRangeKHR range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR: + return y_channel; + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR: + return nir_fmul(b, + nir_fadd(b, + nir_fmul(b, y_channel, + nir_imm_float(b, pow(2, bpc) - 1)), + nir_imm_float(b, -16.0f * pow(2, bpc - 8))), + nir_frcp(b, nir_imm_float(b, 219.0f * pow(2, bpc - 8)))); + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +static nir_ssa_def * +chroma_range(nir_builder *b, + nir_ssa_def *chroma_channel, + int bpc, + VkSamplerYcbcrRangeKHR range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR: + return nir_fadd(b, chroma_channel, + nir_imm_float(b, -pow(2, bpc - 1) / (pow(2, bpc) - 1.0f))); + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR: + return nir_fmul(b, + nir_fadd(b, + nir_fmul(b, chroma_channel, + nir_imm_float(b, pow(2, bpc) - 1)), + nir_imm_float(b, -128.0f * pow(2, bpc - 8))), + nir_frcp(b, nir_imm_float(b, 224.0f * pow(2, bpc - 8)))); + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +static const nir_const_value * +ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversionKHR model) +{ + switch (model) { + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR: { + static const nir_const_value bt601[3] = { + { .f32 = { 1.402f, 1.0f, 0.0f, 0.0f } }, + { .f32 = { -0.714136286201022f, 1.0f, -0.344136286201022f, 0.0f } }, + { .f32 = { 0.0f, 1.0f, 1.772f, 0.0f } } + }; + + return bt601; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR: { + static const nir_const_value bt709[3] = { + { .f32 = { 1.5748031496063f, 1.0f, 0.0, 0.0f } }, + { .f32 = { -0.468125209181067f, 1.0f, -0.187327487470334f, 0.0f } }, + { .f32 = { 0.0f, 1.0f, 1.85563184264242f, 0.0f } } + }; + + return bt709; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR: { + static const nir_const_value bt2020[3] = { + { .f32 = { 1.4746f, 1.0f, 0.0f, 0.0f } }, + { .f32 = { -0.571353126843658f, 1.0f, -0.164553126843658f, 0.0f } }, + { .f32 = { 0.0f, 1.0f, 1.8814f, 0.0f } } + }; + + return bt2020; + } + default: + unreachable("missing Ycbcr model"); + return NULL; + } +} + +static nir_ssa_def * +convert_ycbcr(struct ycbcr_state *state, + nir_ssa_def *raw_channels, + uint32_t *bpcs) +{ + nir_builder *b = state->builder; + struct anv_ycbcr_conversion *conversion = state->conversion; + + nir_ssa_def *expanded_channels = + nir_vec4(b, + chroma_range(b, nir_channel(b, raw_channels, 0), + bpcs[0], conversion->ycbcr_range), + y_range(b, nir_channel(b, raw_channels, 1), + bpcs[1], conversion->ycbcr_range), + chroma_range(b, nir_channel(b, raw_channels, 2), + bpcs[2], conversion->ycbcr_range), + nir_imm_float(b, 1.0f)); + + if (conversion->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR) + return expanded_channels; + + const nir_const_value *conversion_matrix = + ycbcr_model_to_rgb_matrix(conversion->ycbcr_model); + + nir_ssa_def *converted_channels[] = { + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[0])), + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[1])), + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[2])) + }; + + return nir_vec4(b, + converted_channels[0], converted_channels[1], + converted_channels[2], nir_imm_float(b, 1.0f)); +} + +/* TODO: we should probably replace this with a push constant/uniform. */ +static nir_ssa_def * +get_texture_size(struct ycbcr_state *state, nir_deref_var *texture) +{ + if (state->image_size) + return state->image_size; + + nir_builder *b = state->builder; + const struct glsl_type *type = nir_deref_tail(&texture->deref)->type; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 0); + + tex->op = nir_texop_txs; + tex->sampler_dim = glsl_get_sampler_dim(type); + tex->is_array = glsl_sampler_type_is_array(type); + tex->is_shadow = glsl_sampler_type_is_shadow(type); + tex->texture = nir_deref_var_clone(texture, tex); + tex->dest_type = nir_type_int; + + nir_ssa_dest_init(&tex->instr, &tex->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + nir_builder_instr_insert(b, &tex->instr); + + state->image_size = nir_i2f32(b, &tex->dest.ssa); + + return state->image_size; +} + +static nir_ssa_def * +implicit_downsampled_coord(nir_builder *b, + nir_ssa_def *value, + nir_ssa_def *max_value, + int div_scale) +{ + return nir_fadd(b, + value, + nir_fdiv(b, + nir_imm_float(b, 1.0f), + nir_fmul(b, + nir_imm_float(b, div_scale), + max_value))); +} + +static nir_ssa_def * +implicit_downsampled_coords(struct ycbcr_state *state, + nir_ssa_def *old_coords, + const struct anv_format_plane *plane_format) +{ + nir_builder *b = state->builder; + struct anv_ycbcr_conversion *conversion = state->conversion; + nir_ssa_def *image_size = get_texture_size(state, + state->origin_tex->texture); + nir_ssa_def *comp[4] = { NULL, }; + int c; + + for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) { + if (plane_format->denominator_scales[c] > 1 && + conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN_KHR) { + comp[c] = implicit_downsampled_coord(b, + nir_channel(b, old_coords, c), + nir_channel(b, image_size, c), + plane_format->denominator_scales[c]); + } else { + comp[c] = nir_channel(b, old_coords, c); + } + } + + /* Leave other coordinates untouched */ + for (; c < old_coords->num_components; c++) + comp[c] = nir_channel(b, old_coords, c); + + return nir_vec(b, comp, old_coords->num_components); +} + +static nir_ssa_def * +create_plane_tex_instr_implicit(struct ycbcr_state *state, + uint32_t plane) +{ + nir_builder *b = state->builder; + struct anv_ycbcr_conversion *conversion = state->conversion; + const struct anv_format_plane *plane_format = + &conversion->format->planes[plane]; + nir_tex_instr *old_tex = state->origin_tex; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1); + + for (uint32_t i = 0; i < old_tex->num_srcs; i++) { + tex->src[i].src_type = old_tex->src[i].src_type; + + switch (old_tex->src[i].src_type) { + case nir_tex_src_coord: + if (plane_format->has_chroma && conversion->chroma_reconstruction) { + assert(old_tex->src[i].src.is_ssa); + tex->src[i].src = + nir_src_for_ssa(implicit_downsampled_coords(state, + old_tex->src[i].src.ssa, + plane_format)); + break; + } + /* fall through */ + default: + nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex); + break; + } + } + tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane)); + tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane; + + tex->sampler_dim = old_tex->sampler_dim; + tex->dest_type = old_tex->dest_type; + + tex->op = old_tex->op; + tex->coord_components = old_tex->coord_components; + tex->is_new_style_shadow = old_tex->is_new_style_shadow; + tex->component = old_tex->component; + + tex->texture_index = old_tex->texture_index; + tex->texture_array_size = old_tex->texture_array_size; + tex->texture = nir_deref_var_clone(old_tex->texture, tex); + + tex->sampler_index = old_tex->sampler_index; + tex->sampler = nir_deref_var_clone(old_tex->sampler, tex); + + nir_ssa_dest_init(&tex->instr, &tex->dest, + old_tex->dest.ssa.num_components, + nir_dest_bit_size(old_tex->dest), NULL); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +static unsigned +channel_to_component(enum isl_channel_select channel) +{ + switch (channel) { + case ISL_CHANNEL_SELECT_RED: + return 0; + case ISL_CHANNEL_SELECT_GREEN: + return 1; + case ISL_CHANNEL_SELECT_BLUE: + return 2; + case ISL_CHANNEL_SELECT_ALPHA: + return 3; + default: + unreachable("invalid channel"); + return 0; + } +} + +static enum isl_channel_select +swizzle_channel(struct isl_swizzle swizzle, unsigned channel) +{ + switch (channel) { + case 0: + return swizzle.r; + case 1: + return swizzle.g; + case 2: + return swizzle.b; + case 3: + return swizzle.a; + default: + unreachable("invalid channel"); + return 0; + } +} + +static bool +try_lower_tex_ycbcr(struct anv_pipeline *pipeline, + nir_builder *builder, + nir_tex_instr *tex) +{ + nir_variable *var = tex->texture->var; + const struct anv_descriptor_set_layout *set_layout = + pipeline->layout->set[var->data.descriptor_set].layout; + const struct anv_descriptor_set_binding_layout *binding = + &set_layout->binding[var->data.binding]; + + /* For the following instructions, we don't apply any change and let the + * instruction apply to the first plane. + */ + if (tex->op == nir_texop_txs || + tex->op == nir_texop_query_levels || + tex->op == nir_texop_lod) + return false; + + if (binding->immutable_samplers == NULL) + return false; + + unsigned texture_index = tex->texture_index; + if (tex->texture->deref.child) { + assert(tex->texture->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tex->texture->deref.child); + if (deref_array->deref_array_type != nir_deref_array_type_direct) + return false; + size_t hw_binding_size = + anv_descriptor_set_binding_layout_get_hw_size(binding); + texture_index += MIN2(deref_array->base_offset, hw_binding_size - 1); + } + const struct anv_sampler *sampler = + binding->immutable_samplers[texture_index]; + + if (sampler->conversion == NULL) + return false; + + struct ycbcr_state state = { + .builder = builder, + .origin_tex = tex, + .conversion = sampler->conversion, + }; + + builder->cursor = nir_before_instr(&tex->instr); + + const struct anv_format *format = state.conversion->format; + const struct isl_format_layout *y_isl_layout = NULL; + for (uint32_t p = 0; p < format->n_planes; p++) { + if (!format->planes[p].has_chroma) + y_isl_layout = isl_format_get_layout(format->planes[p].isl_format); + } + assert(y_isl_layout != NULL); + uint8_t y_bpc = y_isl_layout->channels_array[0].bits; + + /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */ + nir_ssa_def *ycbcr_comp[5] = { NULL, NULL, NULL, + /* Use extra 2 channels for following swizzle */ + nir_imm_float(builder, 1.0f), + nir_imm_float(builder, 0.0f), + }; + uint8_t ycbcr_bpcs[5]; + memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs)); + + /* Go through all the planes and gather the samples into a |ycbcr_comp| + * while applying a swizzle required by the spec: + * + * R, G, B should respectively map to Cr, Y, Cb + */ + for (uint32_t p = 0; p < format->n_planes; p++) { + const struct anv_format_plane *plane_format = &format->planes[p]; + nir_ssa_def *plane_sample = create_plane_tex_instr_implicit(&state, p); + + for (uint32_t pc = 0; pc < 4; pc++) { + enum isl_channel_select ycbcr_swizzle = + swizzle_channel(plane_format->ycbcr_swizzle, pc); + if (ycbcr_swizzle == ISL_CHANNEL_SELECT_ZERO) + continue; + + unsigned ycbcr_component = channel_to_component(ycbcr_swizzle); + ycbcr_comp[ycbcr_component] = nir_channel(builder, plane_sample, pc); + + /* Also compute the number of bits for each component. */ + const struct isl_format_layout *isl_layout = + isl_format_get_layout(plane_format->isl_format); + ycbcr_bpcs[ycbcr_component] = isl_layout->channels_array[pc].bits; + } + } + + /* Now remaps components to the order specified by the conversion. */ + nir_ssa_def *swizzled_comp[4] = { NULL, }; + uint32_t swizzled_bpcs[4] = { 0, }; + + for (uint32_t i = 0; i < ARRAY_SIZE(state.conversion->mapping); i++) { + /* Maps to components in |ycbcr_comp| */ + static const uint32_t swizzle_mapping[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = 4, + [VK_COMPONENT_SWIZZLE_ONE] = 3, + [VK_COMPONENT_SWIZZLE_R] = 0, + [VK_COMPONENT_SWIZZLE_G] = 1, + [VK_COMPONENT_SWIZZLE_B] = 2, + [VK_COMPONENT_SWIZZLE_A] = 3, + }; + const VkComponentSwizzle m = state.conversion->mapping[i]; + + if (m == VK_COMPONENT_SWIZZLE_IDENTITY) { + swizzled_comp[i] = ycbcr_comp[i]; + swizzled_bpcs[i] = ycbcr_bpcs[i]; + } else { + swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]]; + swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]]; + } + } + + nir_ssa_def *result = nir_vec(builder, swizzled_comp, 4); + if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR) + result = convert_ycbcr(&state, result, swizzled_bpcs); + + nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result)); + nir_instr_remove(&tex->instr); + + return true; +} + +bool +anv_nir_lower_ycbcr_textures(nir_shader *shader, struct anv_pipeline *pipeline) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + bool function_progress = false; + nir_builder builder; + nir_builder_init(&builder, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + function_progress |= try_lower_tex_ycbcr(pipeline, &builder, tex); + } + } + + if (function_progress) { + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + + progress |= function_progress; + } + + return progress; +} |