summaryrefslogtreecommitdiff
path: root/lib/mesa/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src')
-rw-r--r--lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h10
-rw-r--r--lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h5
-rw-r--r--lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am7
-rw-r--r--lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources3
-rw-r--r--lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript1
-rw-r--r--lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h35
-rw-r--r--lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c2
-rw-r--r--lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h20
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h11
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h252
-rw-r--r--lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h4
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c125
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c133
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h6
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_format_tests.c19
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_idalloc.c96
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_idalloc.h62
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_log.c235
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_log.h100
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_mm.c5
-rw-r--r--lib/mesa/src/gallium/auxiliary/vl/vl_csc.h3
-rw-r--r--lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c263
-rw-r--r--lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h88
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h4
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c6
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c11
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h4
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c6
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c24
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c7
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c3
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h1
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h2
-rw-r--r--lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c4
-rw-r--r--lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c4
-rw-r--r--lib/mesa/src/gallium/drivers/r300/r300_hyperz.c1
-rw-r--r--lib/mesa/src/gallium/drivers/r600/cayman_msaa.c270
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c685
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_cs.h209
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c263
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c649
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c1433
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h932
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_query.c2126
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_query.h326
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_streamout.c365
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_test_dma.c398
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_texture.c1953
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_viewport.c456
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600d_common.h135
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_uvd.c1492
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_uvd.h442
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_vce.c533
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_vce.h462
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_video.c349
-rw-r--r--lib/mesa/src/gallium/drivers/r600/radeon_video.h85
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp2
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/r600_cs.h29
-rw-r--r--lib/mesa/src/gallium/drivers/radeon/radeon_vce.h52
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h10
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h49
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c508
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c448
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c209
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c423
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c445
-rw-r--r--lib/mesa/src/gallium/drivers/softpipe/sp_query.c11
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c36
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c28
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_draw_private.h56
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c1
-rw-r--r--lib/mesa/src/gallium/drivers/svga/svga_tgsi.c6
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h152
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl193
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl35
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl127
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl35
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl217
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl161
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl27
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl27
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h1
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h12
-rw-r--r--lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h12
-rw-r--r--lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h2
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/Automake.inc3
-rw-r--r--lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c21
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/Automake.inc14
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/Makefile.am40
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/Makefile.in939
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/Makefile.sources27
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_blit.c226
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c580
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h140
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_cl.c87
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_cl.h251
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_context.c171
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_context.h473
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_draw.c529
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_drm.h191
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_emit.c464
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_fence.c93
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_formats.c416
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_job.c454
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_program.c594
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_query.c91
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c297
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_resource.c751
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_resource.h158
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_screen.c620
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_screen.h100
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c736
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_state.c749
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c402
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h43
-rw-r--r--lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c416
-rw-r--r--lib/mesa/src/gallium/state_trackers/clover/api/event.cpp3
-rw-r--r--lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp30
-rw-r--r--lib/mesa/src/gallium/state_trackers/clover/core/event.cpp39
-rw-r--r--lib/mesa/src/gallium/state_trackers/clover/core/event.hpp4
-rw-r--r--lib/mesa/src/gallium/state_trackers/dri/Makefile.am2
-rw-r--r--lib/mesa/src/gallium/state_trackers/dri/dri_context.h1
-rw-r--r--lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h2
-rw-r--r--lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c388
-rw-r--r--lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h52
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am35
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in899
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources10
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c152
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h48
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c644
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h148
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c1032
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c1013
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c383
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c1278
-rw-r--r--lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h96
-rw-r--r--lib/mesa/src/gallium/state_trackers/va/Makefile.sources1
-rw-r--r--lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c116
-rw-r--r--lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am75
-rw-r--r--lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in1102
-rw-r--r--lib/mesa/src/gallium/targets/omx-bellagio/omx.sym11
-rw-r--r--lib/mesa/src/gallium/targets/omx-bellagio/target.c2
-rw-r--r--lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh8
-rw-r--r--lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh8
-rw-r--r--lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am31
-rw-r--r--lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in882
-rw-r--r--lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources3
-rw-r--r--lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h31
-rw-r--r--lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c35
-rw-r--r--lib/mesa/src/git_sha1.h.in1
-rw-r--r--lib/mesa/src/intel/common/gen_sample_positions.h65
-rw-r--r--lib/mesa/src/intel/common/intel_log.c87
-rw-r--r--lib/mesa/src/intel/common/intel_log.h82
-rw-r--r--lib/mesa/src/intel/isl/isl_genX_priv.h48
-rw-r--r--lib/mesa/src/intel/vulkan/anv_android.c416
-rw-r--r--lib/mesa/src/intel/vulkan/anv_debug_report.c119
-rw-r--r--lib/mesa/src/intel/vulkan/anv_dump.c17
-rw-r--r--lib/mesa/src/intel/vulkan/anv_extensions.c447
-rw-r--r--lib/mesa/src/intel/vulkan/anv_extensions.py284
-rw-r--r--lib/mesa/src/intel/vulkan/anv_icd.py47
-rw-r--r--lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c475
162 files changed, 38768 insertions, 399 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 9ec051a34..c00997b89 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -82,7 +82,8 @@ enum lp_sampler_lod_control {
enum lp_sampler_op_type {
LP_SAMPLER_OP_TEXTURE,
LP_SAMPLER_OP_FETCH,
- LP_SAMPLER_OP_GATHER
+ LP_SAMPLER_OP_GATHER,
+ LP_SAMPLER_OP_LODQ
};
@@ -165,6 +166,7 @@ struct lp_static_sampler_state
unsigned normalized_coords:1;
unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
unsigned lod_bias_non_zero:1;
+ unsigned max_lod_pos:1;
unsigned apply_min_lod:1; /**< min_lod > 0 ? */
unsigned apply_max_lod:1; /**< max_lod < last_level ? */
unsigned seamless_cube_map:1;
@@ -321,6 +323,10 @@ struct lp_build_sample_context
/** number of lod values (valid are 1, length/4, length) */
unsigned num_lods;
+ boolean no_quad_lod;
+ boolean no_brilinear;
+ boolean no_rho_approx;
+
/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
@@ -486,6 +492,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
+ boolean is_lodq,
unsigned texture_index,
unsigned sampler_index,
LLVMValueRef s,
@@ -496,6 +503,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
unsigned mip_filter,
+ LLVMValueRef *out_lod,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart,
LLVMValueRef *out_lod_positive);
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
index 463d44eb4..c92517fee 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
@@ -82,6 +82,11 @@ struct lp_build_emit_data {
LLVMValueRef output[4];
/**
+ * Secondary output for instruction that have a second destination register.
+ */
+ LLVMValueRef output1[4];
+
+ /**
* The current instruction that is being 'executed'.
*/
const struct tgsi_full_instruction * inst;
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am
index 8039a957b..878159f23 100644
--- a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -5,6 +5,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util \
$(GALLIUM_PIPE_LOADER_DEFINES) \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS)
@@ -40,9 +41,11 @@ libpipe_loader_dynamic_la_SOURCES += \
endif
libpipe_loader_static_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
libpipe_loader_dynamic_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
EXTRA_DIST = SConscript
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources
index d6e3c2c06..66dd22ccc 100644
--- a/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/Makefile.sources
@@ -2,7 +2,8 @@ COMMON_SOURCES := \
pipe_loader.c \
pipe_loader.h \
pipe_loader_priv.h \
- pipe_loader_sw.c
+ pipe_loader_sw.c \
+ driinfo_gallium.h
DRM_SOURCES := \
pipe_loader_drm.c
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript b/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript
index 14e1b350a..0f72195b2 100644
--- a/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/SConscript
@@ -7,6 +7,7 @@ env.MSVC2013Compat()
env.Append(CPPPATH = [
'#/src/loader',
'#/src/gallium/winsys',
+ xmlpool_options.dir.dir,
])
env.Append(CPPDEFINES = [
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
new file mode 100644
index 000000000..003a3d708
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
@@ -0,0 +1,35 @@
+// DriConf options supported by all Gallium DRI drivers.
+DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_MESA_GLTHREAD("false")
+ DRI_CONF_MESA_NO_ERROR("false")
+ DRI_CONF_DISABLE_EXT_BUFFER_AGE("false")
+ DRI_CONF_DISABLE_OML_SYNC_CONTROL("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_QUALITY
+ DRI_CONF_PP_CELSHADE(0)
+ DRI_CONF_PP_NORED(0)
+ DRI_CONF_PP_NOGREEN(0)
+ DRI_CONF_PP_NOBLUE(0)
+ DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+ DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_DEBUG
+ DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+ DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+ DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+ DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false")
+ DRI_CONF_FORCE_GLSL_VERSION(0)
+ DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+ DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false")
+ DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false")
+ DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
+ DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+ DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_MISCELLANEOUS
+ DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
+ DRI_CONF_GLSL_ZERO_INIT("false")
+DRI_CONF_SECTION_END
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 250f739c8..d22c24e67 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -30,7 +30,7 @@
* Buffer cache.
*
* \author Jose Fonseca <jfonseca-at-vmware-dot-com>
- * \author Thomas Hellström <thellstom-at-vmware-dot-com>
+ * \author Thomas Hellström <thellstrom-at-vmware-dot-com>
*/
diff --git a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h
index 0b2c363e1..85c14a786 100644
--- a/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h
+++ b/lib/mesa/src/gallium/auxiliary/postprocess/pp_mlaa.h
@@ -164,12 +164,12 @@ static const char offsetvs[] = "VERT\n"
"DCL OUT[1], GENERIC[0]\n"
"DCL OUT[2], GENERIC[10]\n"
"DCL OUT[3], GENERIC[11]\n"
- "DCL CONST[0]\n"
+ "DCL CONST[0][0]\n"
"IMM FLT32 { 1.0000, 0.0000, -1.0000, 0.0000}\n"
" 0: MOV OUT[0], IN[0]\n"
" 1: MOV OUT[1], IN[1]\n"
- " 2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
- " 3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
+ " 2: MAD OUT[2], CONST[0][0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
+ " 3: MAD OUT[3], CONST[0][0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
" 4: END\n";
@@ -183,7 +183,7 @@ static const char blend2fs_1[] = "FRAG\n"
"DCL SVIEW[1], 2D, FLOAT\n"
"DCL SAMP[2]\n"
"DCL SVIEW[2], 2D, FLOAT\n"
- "DCL CONST[0]\n"
+ "DCL CONST[0][0]\n"
"DCL TEMP[0..6]\n"
"IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n"
"IMM FLT32 { -1.5000, -2.0000, 0.9000, 1.5000}\n"
@@ -204,7 +204,7 @@ static const char blend2fs_2[] =
" 11: BRK\n"
" 12: ENDIF\n"
" 13: MOV TEMP[4].y, IMM[0].xxxx\n"
- " 14: MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
+ " 14: MAD TEMP[3].xyz, CONST[0][0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
" 15: MOV TEMP[3].w, IMM[0].xxxx\n"
" 16: TXL TEMP[5], TEMP[3], SAMP[2], 2D\n"
" 17: MOV TEMP[3].x, TEMP[5].yyyy\n"
@@ -229,7 +229,7 @@ static const char blend2fs_2[] =
" 36: BRK\n"
" 37: ENDIF\n"
" 38: MOV TEMP[5].y, IMM[0].xxxx\n"
- " 39: MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
+ " 39: MAD TEMP[4].xyz, CONST[0][0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
" 40: MOV TEMP[4].w, IMM[0].xxxx\n"
" 41: TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n"
" 42: MOV TEMP[4].x, TEMP[6].yyyy\n"
@@ -250,7 +250,7 @@ static const char blend2fs_2[] =
" 57: MOV TEMP[5].x, TEMP[1].xxxx\n"
" 58: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n"
" 59: MOV TEMP[5].z, TEMP[1].xxxx\n"
- " 60: MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n"
+ " 60: MAD TEMP[1], TEMP[5], CONST[0][0].xyxy, IN[0].xyxy\n"
" 61: MOV TEMP[4], TEMP[1].xyyy\n"
" 62: MOV TEMP[4].w, IMM[0].xxxx\n"
" 63: TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n"
@@ -278,7 +278,7 @@ static const char blend2fs_2[] =
" 85: BRK\n"
" 86: ENDIF\n"
" 87: MOV TEMP[3].y, IMM[0].xxxx\n"
- " 88: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
+ " 88: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
" 89: MOV TEMP[5].w, IMM[0].xxxx\n"
" 90: TXL TEMP[4], TEMP[5], SAMP[2], 2D\n"
" 91: MOV TEMP[2].x, TEMP[4].xxxx\n"
@@ -303,7 +303,7 @@ static const char blend2fs_2[] =
"110: BRK\n"
"111: ENDIF\n"
"112: MOV TEMP[4].y, IMM[0].xxxx\n"
- "113: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
+ "113: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
"114: MOV TEMP[5].w, IMM[0].xxxx\n"
"115: TXL TEMP[6], TEMP[5], SAMP[2], 2D\n"
"116: MOV TEMP[3].x, TEMP[6].xxxx\n"
@@ -324,7 +324,7 @@ static const char blend2fs_2[] =
"131: MOV TEMP[4].y, TEMP[1].xxxx\n"
"132: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n"
"133: MOV TEMP[4].w, TEMP[1].xxxx\n"
- "134: MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n"
+ "134: MAD TEMP[1], TEMP[4], CONST[0][0].xyxy, IN[0].xyxy\n"
"135: MOV TEMP[3], TEMP[1].xyyy\n"
"136: MOV TEMP[3].w, IMM[0].xxxx\n"
"137: TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n"
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h
index e60888fec..8d32f4774 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -76,11 +76,10 @@ struct tgsi_opcode_info
unsigned is_tex:1;
unsigned is_store:1;
unsigned is_branch:1;
- int pre_dedent:2;
- int post_indent:2;
+ unsigned pre_dedent:1;
+ unsigned post_indent:1;
enum tgsi_output_mode output_mode:3;
- const char *mnemonic;
- uint opcode;
+ unsigned opcode:8;
};
const struct tgsi_opcode_info *
@@ -112,10 +111,10 @@ static inline bool tgsi_type_is_64bit(enum tgsi_opcode_type type)
}
enum tgsi_opcode_type
-tgsi_opcode_infer_src_type( uint opcode );
+tgsi_opcode_infer_src_type( uint opcode, uint src_idx );
enum tgsi_opcode_type
-tgsi_opcode_infer_dst_type( uint opcode );
+tgsi_opcode_infer_dst_type( uint opcode, uint dst_idx );
#if defined __cplusplus
}
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
new file mode 100644
index 000000000..1b2803cf3
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
@@ -0,0 +1,252 @@
+OPCODE(1, 1, COMP, ARL)
+OPCODE(1, 1, COMP, MOV)
+OPCODE(1, 1, CHAN, LIT)
+OPCODE(1, 1, REPL, RCP)
+OPCODE(1, 1, REPL, RSQ)
+OPCODE(1, 1, CHAN, EXP)
+OPCODE(1, 1, CHAN, LOG)
+OPCODE(1, 2, COMP, MUL)
+OPCODE(1, 2, COMP, ADD)
+OPCODE(1, 2, REPL, DP3)
+OPCODE(1, 2, REPL, DP4)
+OPCODE(1, 2, CHAN, DST)
+OPCODE(1, 2, COMP, MIN)
+OPCODE(1, 2, COMP, MAX)
+OPCODE(1, 2, COMP, SLT)
+OPCODE(1, 2, COMP, SGE)
+OPCODE(1, 3, COMP, MAD)
+OPCODE(1, 2, OTHR, TEX_LZ, .is_tex = 1)
+OPCODE(1, 3, COMP, LRP)
+OPCODE(1, 3, COMP, FMA)
+OPCODE(1, 1, REPL, SQRT)
+OPCODE(1, 2, COMP, LDEXP)
+OPCODE(1, 1, COMP, F2U64)
+OPCODE(1, 1, COMP, F2I64)
+OPCODE(1, 1, COMP, FRC)
+OPCODE(1, 2, OTHR, TXF_LZ, .is_tex = 1)
+OPCODE(1, 1, COMP, FLR)
+OPCODE(1, 1, COMP, ROUND)
+OPCODE(1, 1, REPL, EX2)
+OPCODE(1, 1, REPL, LG2)
+OPCODE(1, 2, REPL, POW)
+OPCODE_GAP(31) /* removed */
+OPCODE(1, 1, COMP, U2I64)
+OPCODE(1, 0, OTHR, CLOCK)
+OPCODE(1, 1, COMP, I2I64)
+OPCODE_GAP(35) /* removed */
+OPCODE(1, 1, REPL, COS)
+OPCODE(1, 1, COMP, DDX)
+OPCODE(1, 1, COMP, DDY)
+OPCODE(0, 0, NONE, KILL)
+OPCODE(1, 1, REPL, PK2H)
+OPCODE(1, 1, REPL, PK2US)
+OPCODE(1, 1, REPL, PK4B)
+OPCODE(1, 1, REPL, PK4UB)
+OPCODE(1, 1, COMP, D2U64)
+OPCODE(1, 2, COMP, SEQ)
+OPCODE(1, 1, COMP, D2I64)
+OPCODE(1, 2, COMP, SGT)
+OPCODE(1, 1, REPL, SIN)
+OPCODE(1, 2, COMP, SLE)
+OPCODE(1, 2, COMP, SNE)
+OPCODE(1, 1, COMP, U642D)
+OPCODE(1, 2, OTHR, TEX, .is_tex = 1)
+OPCODE(1, 4, OTHR, TXD, .is_tex = 1)
+OPCODE(1, 2, OTHR, TXP, .is_tex = 1)
+OPCODE(1, 1, CHAN, UP2H)
+OPCODE(1, 1, CHAN, UP2US)
+OPCODE(1, 1, CHAN, UP4B)
+OPCODE(1, 1, CHAN, UP4UB)
+OPCODE(1, 1, COMP, U642F)
+OPCODE(1, 1, COMP, I642F)
+OPCODE(1, 1, COMP, ARR)
+OPCODE(1, 1, COMP, I642D)
+OPCODE(0, 0, NONE, CAL, .is_branch = 1)
+OPCODE(0, 0, NONE, RET)
+OPCODE(1, 1, COMP, SSG)
+OPCODE(1, 3, COMP, CMP)
+OPCODE_GAP(67) /* removed */
+OPCODE(1, 2, OTHR, TXB, .is_tex = 1)
+OPCODE(1, 1, OTHR, FBFETCH)
+OPCODE(1, 2, COMP, DIV)
+OPCODE(1, 2, REPL, DP2)
+OPCODE(1, 2, OTHR, TXL, .is_tex = 1)
+OPCODE(0, 0, NONE, BRK)
+OPCODE(0, 1, NONE, IF, .is_branch = 1, .post_indent = 1)
+OPCODE(0, 1, NONE, UIF, .is_branch = 1, .post_indent = 1)
+OPCODE(1, 2, COMP, READ_INVOC)
+OPCODE(0, 0, NONE, ELSE, .is_branch = 1, .pre_dedent = 1, .post_indent = 1)
+OPCODE(0, 0, NONE, ENDIF, .pre_dedent = 1)
+OPCODE(1, 1, COMP, DDX_FINE)
+OPCODE(1, 1, COMP, DDY_FINE)
+OPCODE_GAP(81) /* removed */
+OPCODE_GAP(82) /* removed */
+OPCODE(1, 1, COMP, CEIL)
+OPCODE(1, 1, COMP, I2F)
+OPCODE(1, 1, COMP, NOT)
+OPCODE(1, 1, COMP, TRUNC)
+OPCODE(1, 2, COMP, SHL)
+OPCODE(1, 1, OTHR, BALLOT)
+OPCODE(1, 2, COMP, AND)
+OPCODE(1, 2, COMP, OR)
+OPCODE(1, 2, COMP, MOD)
+OPCODE(1, 2, COMP, XOR)
+OPCODE_GAP(93) /* removed */
+OPCODE(1, 2, OTHR, TXF, .is_tex = 1)
+OPCODE(1, 2, OTHR, TXQ, .is_tex = 1)
+OPCODE(0, 0, NONE, CONT)
+OPCODE(0, 1, NONE, EMIT)
+OPCODE(0, 1, NONE, ENDPRIM)
+OPCODE(0, 0, NONE, BGNLOOP, .is_branch = 1, .post_indent = 1)
+OPCODE(0, 0, NONE, BGNSUB, .post_indent = 1)
+OPCODE(0, 0, NONE, ENDLOOP, .is_branch = 1, .pre_dedent = 1)
+OPCODE(0, 0, NONE, ENDSUB, .pre_dedent = 1)
+OPCODE_GAP(103) /* removed */
+OPCODE(1, 1, OTHR, TXQS, .is_tex = 1)
+OPCODE(1, 1, OTHR, RESQ)
+OPCODE(1, 1, COMP, READ_FIRST)
+OPCODE(0, 0, NONE, NOP)
+OPCODE(1, 2, COMP, FSEQ)
+OPCODE(1, 2, COMP, FSGE)
+OPCODE(1, 2, COMP, FSLT)
+OPCODE(1, 2, COMP, FSNE)
+OPCODE(0, 1, OTHR, MEMBAR)
+OPCODE_GAP(113) /* removed */
+OPCODE_GAP(114) /* removed */
+OPCODE_GAP(115) /* removed */
+OPCODE(0, 1, NONE, KILL_IF)
+OPCODE(0, 0, NONE, END)
+OPCODE(1, 3, COMP, DFMA)
+OPCODE(1, 1, COMP, F2I)
+OPCODE(1, 2, COMP, IDIV)
+OPCODE(1, 2, COMP, IMAX)
+OPCODE(1, 2, COMP, IMIN)
+OPCODE(1, 1, COMP, INEG)
+OPCODE(1, 2, COMP, ISGE)
+OPCODE(1, 2, COMP, ISHR)
+OPCODE(1, 2, COMP, ISLT)
+OPCODE(1, 1, COMP, F2U)
+OPCODE(1, 1, COMP, U2F)
+OPCODE(1, 2, COMP, UADD)
+OPCODE(1, 2, COMP, UDIV)
+OPCODE(1, 3, COMP, UMAD)
+OPCODE(1, 2, COMP, UMAX)
+OPCODE(1, 2, COMP, UMIN)
+OPCODE(1, 2, COMP, UMOD)
+OPCODE(1, 2, COMP, UMUL)
+OPCODE(1, 2, COMP, USEQ)
+OPCODE(1, 2, COMP, USGE)
+OPCODE(1, 2, COMP, USHR)
+OPCODE(1, 2, COMP, USLT)
+OPCODE(1, 2, COMP, USNE)
+OPCODE(0, 1, NONE, SWITCH)
+OPCODE(0, 1, NONE, CASE)
+OPCODE(0, 0, NONE, DEFAULT)
+OPCODE(0, 0, NONE, ENDSWITCH)
+
+OPCODE(1, 3, OTHR, SAMPLE)
+OPCODE(1, 2, OTHR, SAMPLE_I)
+OPCODE(1, 3, OTHR, SAMPLE_I_MS)
+OPCODE(1, 4, OTHR, SAMPLE_B)
+OPCODE(1, 4, OTHR, SAMPLE_C)
+OPCODE(1, 4, OTHR, SAMPLE_C_LZ)
+OPCODE(1, 5, OTHR, SAMPLE_D)
+OPCODE(1, 4, OTHR, SAMPLE_L)
+OPCODE(1, 3, OTHR, GATHER4)
+OPCODE(1, 2, OTHR, SVIEWINFO)
+OPCODE(1, 2, OTHR, SAMPLE_POS)
+OPCODE(1, 2, OTHR, SAMPLE_INFO)
+OPCODE(1, 1, COMP, UARL)
+OPCODE(1, 3, COMP, UCMP)
+OPCODE(1, 1, COMP, IABS)
+OPCODE(1, 1, COMP, ISSG)
+OPCODE(1, 2, OTHR, LOAD)
+OPCODE(1, 2, OTHR, STORE, .is_store = 1)
+OPCODE_GAP(163) /* removed */
+OPCODE_GAP(164) /* removed */
+OPCODE_GAP(165) /* removed */
+OPCODE(0, 0, OTHR, BARRIER)
+
+OPCODE(1, 3, OTHR, ATOMUADD, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMXCHG, .is_store = 1)
+OPCODE(1, 4, OTHR, ATOMCAS, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMAND, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMOR, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMXOR, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMUMIN, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMUMAX, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMIMIN, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMIMAX, .is_store = 1)
+OPCODE(1, 3, OTHR, TEX2, .is_tex = 1)
+OPCODE(1, 3, OTHR, TXB2, .is_tex = 1)
+OPCODE(1, 3, OTHR, TXL2, .is_tex = 1)
+OPCODE(1, 2, COMP, IMUL_HI)
+OPCODE(1, 2, COMP, UMUL_HI)
+OPCODE(1, 3, OTHR, TG4, .is_tex = 1)
+OPCODE(1, 2, OTHR, LODQ, .is_tex = 1)
+OPCODE(1, 3, COMP, IBFE)
+OPCODE(1, 3, COMP, UBFE)
+OPCODE(1, 4, COMP, BFI)
+OPCODE(1, 1, COMP, BREV)
+OPCODE(1, 1, COMP, POPC)
+OPCODE(1, 1, COMP, LSB)
+OPCODE(1, 1, COMP, IMSB)
+OPCODE(1, 1, COMP, UMSB)
+OPCODE(1, 1, OTHR, INTERP_CENTROID)
+OPCODE(1, 2, OTHR, INTERP_SAMPLE)
+OPCODE(1, 2, OTHR, INTERP_OFFSET)
+OPCODE(1, 1, COMP, F2D)
+OPCODE(1, 1, COMP, D2F)
+OPCODE(1, 1, COMP, DABS)
+OPCODE(1, 1, COMP, DNEG)
+OPCODE(1, 2, COMP, DADD)
+OPCODE(1, 2, COMP, DMUL)
+OPCODE(1, 2, COMP, DMAX)
+OPCODE(1, 2, COMP, DMIN)
+OPCODE(1, 2, COMP, DSLT)
+OPCODE(1, 2, COMP, DSGE)
+OPCODE(1, 2, COMP, DSEQ)
+OPCODE(1, 2, COMP, DSNE)
+OPCODE(1, 1, COMP, DRCP)
+OPCODE(1, 1, COMP, DSQRT)
+OPCODE(1, 3, COMP, DMAD)
+OPCODE(1, 1, COMP, DFRAC)
+OPCODE(1, 2, COMP, DLDEXP)
+OPCODE(2, 1, REPL, DFRACEXP)
+OPCODE(1, 1, COMP, D2I)
+OPCODE(1, 1, COMP, I2D)
+OPCODE(1, 1, COMP, D2U)
+OPCODE(1, 1, COMP, U2D)
+OPCODE(1, 1, COMP, DRSQ)
+OPCODE(1, 1, COMP, DTRUNC)
+OPCODE(1, 1, COMP, DCEIL)
+OPCODE(1, 1, COMP, DFLR)
+OPCODE(1, 1, COMP, DROUND)
+OPCODE(1, 1, COMP, DSSG)
+OPCODE(1, 1, COMP, VOTE_ANY)
+OPCODE(1, 1, COMP, VOTE_ALL)
+OPCODE(1, 1, COMP, VOTE_EQ)
+OPCODE(1, 2, COMP, U64SEQ)
+OPCODE(1, 2, COMP, U64SNE)
+OPCODE(1, 2, COMP, I64SLT)
+OPCODE(1, 2, COMP, U64SLT)
+OPCODE(1, 2, COMP, I64SGE)
+OPCODE(1, 2, COMP, U64SGE)
+OPCODE(1, 2, COMP, I64MIN)
+OPCODE(1, 2, COMP, U64MIN)
+OPCODE(1, 2, COMP, I64MAX)
+OPCODE(1, 2, COMP, U64MAX)
+OPCODE(1, 1, COMP, I64ABS)
+OPCODE(1, 1, COMP, I64SSG)
+OPCODE(1, 1, COMP, I64NEG)
+OPCODE(1, 2, COMP, U64ADD)
+OPCODE(1, 2, COMP, U64MUL)
+OPCODE(1, 2, COMP, U64SHL)
+OPCODE(1, 2, COMP, I64SHR)
+OPCODE(1, 2, COMP, U64SHR)
+OPCODE(1, 2, COMP, I64DIV)
+OPCODE(1, 2, COMP, U64DIV)
+OPCODE(1, 2, COMP, I64MOD)
+OPCODE(1, 2, COMP, U64MOD)
+OPCODE(1, 2, COMP, DDIV)
+OPCODE(1, 3, OTHR, LOD)
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 20e4f843a..fd4c38f01 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -55,8 +55,6 @@ struct tgsi_lowering_config
* enable lowering of TGSI_OPCODE_<opc>
*/
unsigned lower_DST:1;
- unsigned lower_XPD:1;
- unsigned lower_SCS:1;
unsigned lower_LRP:1;
unsigned lower_FRC:1;
unsigned lower_POW:1;
@@ -65,9 +63,7 @@ struct tgsi_lowering_config
unsigned lower_LOG:1;
unsigned lower_DP4:1;
unsigned lower_DP3:1;
- unsigned lower_DPH:1;
unsigned lower_DP2:1;
- unsigned lower_DP2A:1;
unsigned lower_FLR:1;
unsigned lower_CEIL:1;
unsigned lower_TRUNC:1;
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c b/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c
index 9d831efcf..e87e53016 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_dump_defines.c
@@ -62,36 +62,36 @@ util_dump_enum_continuous(unsigned value,
}
-#define DEFINE_UTIL_DUMP_CONTINUOUS(_name) \
+#define DEFINE_UTIL_STR_CONTINUOUS(_name) \
const char * \
- util_dump_##_name(unsigned value, boolean shortened) \
+ util_str_##_name(unsigned value, boolean shortened) \
{ \
if(shortened) \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \
else \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \
}
/**
- * Same as DEFINE_UTIL_DUMP_CONTINUOUS but with static assertions to detect
+ * Same as DEFINE_UTIL_STR_CONTINUOUS but with static assertions to detect
* failures to update lists.
*/
-#define DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(_name, _count) \
+#define DEFINE_UTIL_STR_CONTINUOUS_COUNT(_name, _count) \
const char * \
- util_dump_##_name(unsigned value, boolean shortened) \
+ util_str_##_name(unsigned value, boolean shortened) \
{ \
- STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_names) == _count); \
- STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_short_names) == _count); \
+ STATIC_ASSERT(ARRAY_SIZE(util_##_name##_names) == _count); \
+ STATIC_ASSERT(ARRAY_SIZE(util_##_name##_short_names) == _count); \
if(shortened) \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \
else \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \
}
static const char *
-util_dump_blend_factor_names[] = {
+util_blend_factor_names[] = {
UTIL_DUMP_INVALID_NAME, /* 0x0 */
"PIPE_BLENDFACTOR_ONE",
"PIPE_BLENDFACTOR_SRC_COLOR",
@@ -122,7 +122,7 @@ util_dump_blend_factor_names[] = {
};
static const char *
-util_dump_blend_factor_short_names[] = {
+util_blend_factor_short_names[] = {
UTIL_DUMP_INVALID_NAME, /* 0x0 */
"one",
"src_color",
@@ -152,11 +152,11 @@ util_dump_blend_factor_short_names[] = {
"inv_src1_alpha"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(blend_factor)
+DEFINE_UTIL_STR_CONTINUOUS(blend_factor)
static const char *
-util_dump_blend_func_names[] = {
+util_blend_func_names[] = {
"PIPE_BLEND_ADD",
"PIPE_BLEND_SUBTRACT",
"PIPE_BLEND_REVERSE_SUBTRACT",
@@ -165,7 +165,7 @@ util_dump_blend_func_names[] = {
};
static const char *
-util_dump_blend_func_short_names[] = {
+util_blend_func_short_names[] = {
"add",
"sub",
"rev_sub",
@@ -173,11 +173,11 @@ util_dump_blend_func_short_names[] = {
"max"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(blend_func)
+DEFINE_UTIL_STR_CONTINUOUS(blend_func)
static const char *
-util_dump_logicop_names[] = {
+util_logicop_names[] = {
"PIPE_LOGICOP_CLEAR",
"PIPE_LOGICOP_NOR",
"PIPE_LOGICOP_AND_INVERTED",
@@ -197,7 +197,7 @@ util_dump_logicop_names[] = {
};
static const char *
-util_dump_logicop_short_names[] = {
+util_logicop_short_names[] = {
"clear",
"nor",
"and_inverted",
@@ -216,11 +216,11 @@ util_dump_logicop_short_names[] = {
"set"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(logicop)
+DEFINE_UTIL_STR_CONTINUOUS(logicop)
static const char *
-util_dump_func_names[] = {
+util_func_names[] = {
"PIPE_FUNC_NEVER",
"PIPE_FUNC_LESS",
"PIPE_FUNC_EQUAL",
@@ -232,7 +232,7 @@ util_dump_func_names[] = {
};
static const char *
-util_dump_func_short_names[] = {
+util_func_short_names[] = {
"never",
"less",
"equal",
@@ -243,11 +243,11 @@ util_dump_func_short_names[] = {
"always"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(func)
+DEFINE_UTIL_STR_CONTINUOUS(func)
static const char *
-util_dump_stencil_op_names[] = {
+util_stencil_op_names[] = {
"PIPE_STENCIL_OP_KEEP",
"PIPE_STENCIL_OP_ZERO",
"PIPE_STENCIL_OP_REPLACE",
@@ -259,7 +259,7 @@ util_dump_stencil_op_names[] = {
};
static const char *
-util_dump_stencil_op_short_names[] = {
+util_stencil_op_short_names[] = {
"keep",
"zero",
"replace",
@@ -270,11 +270,11 @@ util_dump_stencil_op_short_names[] = {
"invert"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(stencil_op)
+DEFINE_UTIL_STR_CONTINUOUS(stencil_op)
static const char *
-util_dump_tex_target_names[] = {
+util_tex_target_names[] = {
"PIPE_BUFFER",
"PIPE_TEXTURE_1D",
"PIPE_TEXTURE_2D",
@@ -287,7 +287,7 @@ util_dump_tex_target_names[] = {
};
static const char *
-util_dump_tex_target_short_names[] = {
+util_tex_target_short_names[] = {
"buffer",
"1d",
"2d",
@@ -299,11 +299,11 @@ util_dump_tex_target_short_names[] = {
"cube_array",
};
-DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES)
+DEFINE_UTIL_STR_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES)
static const char *
-util_dump_tex_wrap_names[] = {
+util_tex_wrap_names[] = {
"PIPE_TEX_WRAP_REPEAT",
"PIPE_TEX_WRAP_CLAMP",
"PIPE_TEX_WRAP_CLAMP_TO_EDGE",
@@ -315,7 +315,7 @@ util_dump_tex_wrap_names[] = {
};
static const char *
-util_dump_tex_wrap_short_names[] = {
+util_tex_wrap_short_names[] = {
"repeat",
"clamp",
"clamp_to_edge",
@@ -326,45 +326,46 @@ util_dump_tex_wrap_short_names[] = {
"mirror_clamp_to_border"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_wrap)
+DEFINE_UTIL_STR_CONTINUOUS(tex_wrap)
static const char *
-util_dump_tex_mipfilter_names[] = {
+util_tex_mipfilter_names[] = {
"PIPE_TEX_MIPFILTER_NEAREST",
"PIPE_TEX_MIPFILTER_LINEAR",
"PIPE_TEX_MIPFILTER_NONE"
};
static const char *
-util_dump_tex_mipfilter_short_names[] = {
+util_tex_mipfilter_short_names[] = {
"nearest",
"linear",
"none"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_mipfilter)
+DEFINE_UTIL_STR_CONTINUOUS(tex_mipfilter)
static const char *
-util_dump_tex_filter_names[] = {
+util_tex_filter_names[] = {
"PIPE_TEX_FILTER_NEAREST",
"PIPE_TEX_FILTER_LINEAR"
};
static const char *
-util_dump_tex_filter_short_names[] = {
+util_tex_filter_short_names[] = {
"nearest",
"linear"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_filter)
+DEFINE_UTIL_STR_CONTINUOUS(tex_filter)
static const char *
-util_dump_query_type_names[] = {
+util_query_type_names[] = {
"PIPE_QUERY_OCCLUSION_COUNTER",
"PIPE_QUERY_OCCLUSION_PREDICATE",
+ "PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE",
"PIPE_QUERY_TIMESTAMP",
"PIPE_QUERY_TIMESTAMP_DISJOINT",
"PIPE_QUERY_TIME_ELAPSED",
@@ -372,12 +373,13 @@ util_dump_query_type_names[] = {
"PIPE_QUERY_PRIMITIVES_EMITTED",
"PIPE_QUERY_SO_STATISTICS",
"PIPE_QUERY_SO_OVERFLOW_PREDICATE",
+ "PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE",
"PIPE_QUERY_GPU_FINISHED",
"PIPE_QUERY_PIPELINE_STATISTICS",
};
static const char *
-util_dump_query_type_short_names[] = {
+util_query_type_short_names[] = {
"occlusion_counter",
"occlusion_predicate",
"timestamp",
@@ -391,11 +393,30 @@ util_dump_query_type_short_names[] = {
"pipeline_statistics",
};
-DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+DEFINE_UTIL_STR_CONTINUOUS(query_type)
static const char *
-util_dump_prim_mode_names[] = {
+util_query_value_type_names[] = {
+ "PIPE_QUERY_TYPE_I32",
+ "PIPE_QUERY_TYPE_U32",
+ "PIPE_QUERY_TYPE_I64",
+ "PIPE_QUERY_TYPE_U64",
+};
+
+static const char *
+util_query_value_type_short_names[] = {
+ "i32",
+ "u32",
+ "i64",
+ "u64",
+};
+
+DEFINE_UTIL_STR_CONTINUOUS(query_value_type)
+
+
+static const char *
+util_prim_mode_names[] = {
"PIPE_PRIM_POINTS",
"PIPE_PRIM_LINES",
"PIPE_PRIM_LINE_LOOP",
@@ -414,7 +435,7 @@ util_dump_prim_mode_names[] = {
};
static const char *
-util_dump_prim_mode_short_names[] = {
+util_prim_mode_short_names[] = {
"points",
"lines",
"line_loop",
@@ -432,4 +453,20 @@ util_dump_prim_mode_short_names[] = {
"patches",
};
-DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode)
+DEFINE_UTIL_STR_CONTINUOUS(prim_mode)
+
+void
+util_dump_query_type(FILE *stream, unsigned value)
+{
+ if (value >= PIPE_QUERY_DRIVER_SPECIFIC)
+ fprintf(stream, "PIPE_QUERY_DRIVER_SPECIFIC + %i",
+ value - PIPE_QUERY_DRIVER_SPECIFIC);
+ else
+ fprintf(stream, "%s", util_str_query_type(value, false));
+}
+
+void
+util_dump_query_value_type(FILE *stream, unsigned value)
+{
+ fprintf(stream, "%s", util_str_query_value_type(value, false));
+}
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c
index 8c4f2150b..3f755e536 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -28,136 +28,15 @@
#include "u_format.h"
#include "u_format_s3tc.h"
#include "util/format_srgb.h"
+#include "../../../mesa/main/texcompress_s3tc_tmp.h"
-#if defined(_WIN32) || defined(WIN32)
-#define DXTN_LIBNAME "dxtn.dll"
-#elif defined(__CYGWIN__)
-#define DXTN_LIBNAME "cygtxc_dxtn.dll"
-#elif defined(__APPLE__)
-#define DXTN_LIBNAME "libtxc_dxtn.dylib"
-#else
-#define DXTN_LIBNAME "libtxc_dxtn.so"
-#endif
+util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1;
+util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1;
+util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3;
+util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5;
-
-static void
-util_format_dxt1_rgb_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst)
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt1_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt3_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt5_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxtn_pack_stub(int src_comps,
- int width, int height,
- const uint8_t *src,
- enum util_format_dxtn dst_format,
- uint8_t *dst,
- int dst_stride)
-{
- assert(0);
-}
-
-
-boolean util_format_s3tc_enabled = FALSE;
-
-util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub;
-
-util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub;
-
-
-void
-util_format_s3tc_init(void)
-{
- static boolean first_time = TRUE;
- struct util_dl_library *library = NULL;
- util_dl_proc fetch_2d_texel_rgb_dxt1;
- util_dl_proc fetch_2d_texel_rgba_dxt1;
- util_dl_proc fetch_2d_texel_rgba_dxt3;
- util_dl_proc fetch_2d_texel_rgba_dxt5;
- util_dl_proc tx_compress_dxtn;
-
- if (!first_time)
- return;
- first_time = FALSE;
-
- if (util_format_s3tc_enabled)
- return;
-
- library = util_dl_open(DXTN_LIBNAME);
- if (!library) {
- debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
- "compression/decompression unavailable\n");
- return;
- }
-
- fetch_2d_texel_rgb_dxt1 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1");
- fetch_2d_texel_rgba_dxt1 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1");
- fetch_2d_texel_rgba_dxt3 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3");
- fetch_2d_texel_rgba_dxt5 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5");
- tx_compress_dxtn =
- util_dl_get_proc_address(library, "tx_compress_dxtn");
-
- if (!util_format_dxt1_rgb_fetch ||
- !util_format_dxt1_rgba_fetch ||
- !util_format_dxt3_rgba_fetch ||
- !util_format_dxt5_rgba_fetch ||
- !util_format_dxtn_pack) {
- debug_printf("couldn't reference all symbols in " DXTN_LIBNAME
- ", software DXTn compression/decompression "
- "unavailable\n");
- util_dl_close(library);
- return;
- }
-
- util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1;
- util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1;
- util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3;
- util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5;
- util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn;
- util_format_s3tc_enabled = TRUE;
-}
+util_format_dxtn_pack_t util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn;
/*
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h
index ae20010cd..6f188c67f 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_format_s3tc.h
@@ -58,8 +58,6 @@ typedef void
uint8_t *dst,
int dst_stride);
-extern boolean util_format_s3tc_enabled;
-
extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch;
extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch;
extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch;
@@ -69,10 +67,6 @@ extern util_format_dxtn_pack_t util_format_dxtn_pack;
void
-util_format_s3tc_init(void);
-
-
-void
util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
void
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c
index 3075ea0ab..9c9a5838d 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_format_tests.c
@@ -140,6 +140,12 @@ util_format_test_cases[] =
{PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3fffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
@@ -164,6 +170,19 @@ util_format_test_cases[] =
{PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xfffe), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c
new file mode 100644
index 000000000..26104552e
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * A simple allocator that allocates and release "numbers".
+ *
+ * @author Samuel Pitoiset <samuel.pitoiset@gmail.com>
+ */
+
+#include "util/u_idalloc.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+void
+util_idalloc_init(struct util_idalloc *buf)
+{
+ memset(buf, 0, sizeof(*buf));
+}
+
+void
+util_idalloc_fini(struct util_idalloc *buf)
+{
+ if (buf->data)
+ free(buf->data);
+}
+
+void
+util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements)
+{
+ new_num_elements = align(new_num_elements, 32);
+
+ if (new_num_elements > buf->num_elements) {
+ unsigned i;
+
+ buf->data = realloc(buf->data,
+ (new_num_elements / 32) * sizeof(*buf->data));
+
+ for (i = buf->num_elements / 32; i < new_num_elements / 32; i++)
+ buf->data[i] = 0;
+ buf->num_elements = new_num_elements;
+ }
+}
+
+unsigned
+util_idalloc_alloc(struct util_idalloc *buf)
+{
+ unsigned num_elements = buf->num_elements;
+
+ for (unsigned i = 0; i < num_elements / 32; i++) {
+ if (buf->data[i] == 0xffffffff)
+ continue;
+
+ unsigned bit = ffs(~buf->data[i]) - 1;
+ buf->data[i] |= 1u << bit;
+ return i * 32 + bit;
+ }
+
+ /* No slots available, resize and return the first free. */
+ util_idalloc_resize(buf, num_elements * 2);
+
+ buf->data[num_elements / 32] |= 1 << (num_elements % 32);
+
+ return num_elements;
+}
+
+void
+util_idalloc_free(struct util_idalloc *buf, unsigned id)
+{
+ assert(id < buf->num_elements);
+ buf->data[id / 32] &= ~(1 << (id % 32));
+}
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h
new file mode 100644
index 000000000..82469e94d
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_IDALLOC_H
+#define U_IDALLOC_H
+
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct util_idalloc
+{
+ uint32_t *data;
+ unsigned num_elements;
+};
+
+void
+util_idalloc_init(struct util_idalloc *buf);
+
+void
+util_idalloc_fini(struct util_idalloc *buf);
+
+void
+util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements);
+
+unsigned
+util_idalloc_alloc(struct util_idalloc *buf);
+
+void
+util_idalloc_free(struct util_idalloc *buf, unsigned id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_IDALLOC_H */
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_log.c b/lib/mesa/src/gallium/auxiliary/util/u_log.c
new file mode 100644
index 000000000..dacbe0505
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/util/u_log.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "u_log.h"
+
+#include "u_memory.h"
+#include "util/u_string.h"
+
+struct page_entry {
+ const struct u_log_chunk_type *type;
+ void *data;
+};
+
+struct u_log_page {
+ struct page_entry *entries;
+ unsigned num_entries;
+ unsigned max_entries;
+};
+
+struct u_log_auto_logger {
+ u_auto_log_fn *callback;
+ void *data;
+};
+
+/**
+ * Initialize the given logging context.
+ */
+void
+u_log_context_init(struct u_log_context *ctx)
+{
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/**
+ * Free all resources associated with the given logging context.
+ *
+ * Pages taken from the context via \ref u_log_new_page must be destroyed
+ * separately.
+ */
+void
+u_log_context_destroy(struct u_log_context *ctx)
+{
+ u_log_page_destroy(ctx->cur);
+ FREE(ctx->auto_loggers);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/**
+ * Add an auto logger.
+ *
+ * Auto loggers are called each time a chunk is added to the log.
+ */
+void
+u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback,
+ void *data)
+{
+ struct u_log_auto_logger *new_auto_loggers =
+ REALLOC(ctx->auto_loggers,
+ sizeof(*new_auto_loggers) * ctx->num_auto_loggers,
+ sizeof(*new_auto_loggers) * (ctx->num_auto_loggers + 1));
+ if (!new_auto_loggers) {
+ fprintf(stderr, "Gallium u_log: out of memory\n");
+ return;
+ }
+
+ unsigned idx = ctx->num_auto_loggers++;
+ ctx->auto_loggers = new_auto_loggers;
+ ctx->auto_loggers[idx].callback = callback;
+ ctx->auto_loggers[idx].data = data;
+}
+
+/**
+ * Make sure that auto loggers have run.
+ */
+void
+u_log_flush(struct u_log_context *ctx)
+{
+ if (!ctx->num_auto_loggers)
+ return;
+
+ struct u_log_auto_logger *auto_loggers = ctx->auto_loggers;
+ unsigned num_auto_loggers = ctx->num_auto_loggers;
+
+ /* Prevent recursion. */
+ ctx->num_auto_loggers = 0;
+ ctx->auto_loggers = NULL;
+
+ for (unsigned i = 0; i < num_auto_loggers; ++i)
+ auto_loggers[i].callback(auto_loggers[i].data, ctx);
+
+ assert(!ctx->num_auto_loggers);
+ ctx->num_auto_loggers = num_auto_loggers;
+ ctx->auto_loggers = auto_loggers;
+}
+
+static void str_print(void *data, FILE *stream)
+{
+ fputs((char *)data, stream);
+}
+
+static const struct u_log_chunk_type str_chunk_type = {
+ .destroy = free,
+ .print = str_print,
+};
+
+void
+u_log_printf(struct u_log_context *ctx, const char *fmt, ...)
+{
+ va_list va;
+ char *str = NULL;
+
+ va_start(va, fmt);
+ int ret = util_vasprintf(&str, fmt, va);
+ va_end(va);
+
+ if (ret >= 0) {
+ u_log_chunk(ctx, &str_chunk_type, str);
+ } else {
+ fprintf(stderr, "Gallium u_log_printf: out of memory\n");
+ }
+}
+
+/**
+ * Add a custom chunk to the log.
+ *
+ * type->destroy will be called as soon as \p data is no longer needed.
+ */
+void
+u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type,
+ void *data)
+{
+ struct u_log_page *page = ctx->cur;
+
+ u_log_flush(ctx);
+
+ if (!page) {
+ ctx->cur = CALLOC_STRUCT(u_log_page);
+ page = ctx->cur;
+ if (!page)
+ goto out_of_memory;
+ }
+
+ if (page->num_entries >= page->max_entries) {
+ unsigned new_max_entries = MAX2(16, page->num_entries * 2);
+ struct page_entry *new_entries = REALLOC(page->entries,
+ page->max_entries * sizeof(*page->entries),
+ new_max_entries * sizeof(*page->entries));
+ if (!new_entries)
+ goto out_of_memory;
+
+ page->entries = new_entries;
+ page->max_entries = new_max_entries;
+ }
+
+ page->entries[page->num_entries].type = type;
+ page->entries[page->num_entries].data = data;
+ page->num_entries++;
+ return;
+
+out_of_memory:
+ fprintf(stderr, "Gallium: u_log: out of memory\n");
+}
+
+/**
+ * Convenience helper that starts a new page and prints the previous one.
+ */
+void
+u_log_new_page_print(struct u_log_context *ctx, FILE *stream)
+{
+ if (ctx->cur) {
+ u_log_page_print(ctx->cur, stream);
+ u_log_page_destroy(ctx->cur);
+ ctx->cur = NULL;
+ }
+}
+
+/**
+ * Return the current page from the logging context and start a new one.
+ *
+ * The caller is responsible for destroying the returned page.
+ */
+struct u_log_page *
+u_log_new_page(struct u_log_context *ctx)
+{
+ struct u_log_page *page = ctx->cur;
+ ctx->cur = NULL;
+ return page;
+}
+
+/**
+ * Free all data associated with \p page.
+ */
+void
+u_log_page_destroy(struct u_log_page *page)
+{
+ if (!page)
+ return;
+
+ for (unsigned i = 0; i < page->num_entries; ++i) {
+ if (page->entries[i].type->destroy)
+ page->entries[i].type->destroy(page->entries[i].data);
+ }
+ FREE(page->entries);
+ FREE(page);
+}
+
+/**
+ * Print the given page to \p stream.
+ */
+void
+u_log_page_print(struct u_log_page *page, FILE *stream)
+{
+ for (unsigned i = 0; i < page->num_entries; ++i)
+ page->entries[i].type->print(page->entries[i].data, stream);
+}
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_log.h b/lib/mesa/src/gallium/auxiliary/util/u_log.h
new file mode 100644
index 000000000..09c47caee
--- /dev/null
+++ b/lib/mesa/src/gallium/auxiliary/util/u_log.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file u_log.h
+ * @brief Context logging facilities
+ *
+ * Provides a means of logging context events (draw calls, command streams, ...)
+ * into files.
+ *
+ * Log entries start their life cycle as "chunks". Chunks can be plain text
+ * written by \ref u_log_printf or custom internal representations added by
+ * \ref u_log_chunk that are only converted to text on-demand (e.g. for higher
+ * performance pipelined hang-debugging).
+ *
+ * Chunks are accumulated into "pages". The manager of the log can periodically
+ * take out the current page using \ref u_log_new_page and dump it to a file.
+ *
+ * Furthermore, "auto loggers" can be added to a context, which are callbacks
+ * that are given the opportunity to add their own logging each time a chunk is
+ * added. Drivers can use this to lazily log chunks of their command stream.
+ * Lazy loggers don't need to be re-entrant.
+ */
+
+#ifndef U_LOG_H
+#define U_LOG_H
+
+#include <stdio.h>
+
+#include "u_debug.h"
+
+struct u_log_page;
+struct u_log_auto_logger;
+
+struct u_log_chunk_type {
+ void (*destroy)(void *data);
+ void (*print)(void *data, FILE *stream);
+};
+
+struct u_log_context {
+ struct u_log_page *cur;
+ struct u_log_auto_logger *auto_loggers;
+ unsigned num_auto_loggers;
+};
+
+typedef void (u_auto_log_fn)(void *data, struct u_log_context *ctx);
+
+void
+u_log_context_init(struct u_log_context *ctx);
+
+void
+u_log_context_destroy(struct u_log_context *ctx);
+
+void
+u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback,
+ void *data);
+
+void
+u_log_flush(struct u_log_context *ctx);
+
+void
+u_log_printf(struct u_log_context *ctx, const char *fmt, ...) _util_printf_format(2,3);
+
+void
+u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type,
+ void *data);
+
+void
+u_log_new_page_print(struct u_log_context *ctx, FILE *stream);
+
+struct u_log_page *
+u_log_new_page(struct u_log_context *ctx);
+
+void
+u_log_page_destroy(struct u_log_page *page);
+
+void
+u_log_page_print(struct u_log_page *page, FILE *stream);
+
+#endif /* U_LOG_H */
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_mm.c b/lib/mesa/src/gallium/auxiliary/util/u_mm.c
index bd4c4e1b1..7a45e2919 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_mm.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_mm.c
@@ -183,7 +183,10 @@ u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
assert(size >= 0);
assert(align2 >= 0);
- assert(align2 <= 12); /* sanity check, 2^12 (4KB) enough? */
+ /* Make sure that a byte alignment isn't getting passed for our
+ * power-of-two alignment arg.
+ */
+ assert(align2 < 32);
if (!heap || align2 < 0 || size <= 0)
return NULL;
diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h b/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h
index 4927470e4..8623e1b3b 100644
--- a/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h
+++ b/lib/mesa/src/gallium/auxiliary/vl/vl_csc.h
@@ -45,7 +45,8 @@ enum VL_CSC_COLOR_STANDARD
VL_CSC_COLOR_STANDARD_IDENTITY,
VL_CSC_COLOR_STANDARD_BT_601,
VL_CSC_COLOR_STANDARD_BT_709,
- VL_CSC_COLOR_STANDARD_SMPTE_240M
+ VL_CSC_COLOR_STANDARD_SMPTE_240M,
+ VL_CSC_COLOR_STANDARD_BT_709_REV
};
extern const struct vl_procamp vl_default_procamp;
diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c
new file mode 100644
index 000000000..0f3cd7257
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2017 Etnaviv Project
+ * Copyright (C) 2017 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ * Christian Gmeiner <christian.gmeiner@gmail.com>
+ */
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "etnaviv_context.h"
+#include "etnaviv_debug.h"
+#include "etnaviv_emit.h"
+#include "etnaviv_query_hw.h"
+#include "etnaviv_screen.h"
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static void
+occlusion_start(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ struct etna_resource *rsc = etna_resource(hq->prsc);
+ struct etna_reloc r = {
+ .bo = rsc->bo,
+ .flags = ETNA_RELOC_WRITE
+ };
+
+ if (hq->samples > 63) {
+ hq->samples = 63;
+ BUG("samples overflow");
+ }
+
+ r.offset = hq->samples * 8; /* 64bit value */
+
+ etna_set_state_reloc(ctx->stream, VIVS_GL_OCCLUSION_QUERY_ADDR, &r);
+}
+
+static void
+occlusion_stop(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ /* 0x1DF5E76 is the value used by blob - but any random value will work */
+ etna_set_state(ctx->stream, VIVS_GL_OCCLUSION_QUERY_CONTROL, 0x1DF5E76);
+}
+
+static void
+occlusion_suspend(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ occlusion_stop(hq, ctx);
+}
+
+static void
+occlusion_resume(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ hq->samples++;
+ occlusion_start(hq, ctx);
+}
+
+static void
+occlusion_result(struct etna_hw_query *hq, void *buf,
+ union pipe_query_result *result)
+{
+ uint64_t sum = 0;
+ uint64_t *ptr = (uint64_t *)buf;
+
+ for (unsigned i = 0; i <= hq->samples; i++)
+ sum += *(ptr + i);
+
+ if (hq->base.type == PIPE_QUERY_OCCLUSION_COUNTER)
+ result->u64 = sum;
+ else
+ result->b = !!sum;
+}
+
+static void
+etna_hw_destroy_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+
+ pipe_resource_reference(&hq->prsc, NULL);
+ list_del(&hq->node);
+
+ FREE(hq);
+}
+
+static const struct etna_hw_sample_provider occlusion_provider = {
+ .start = occlusion_start,
+ .stop = occlusion_stop,
+ .suspend = occlusion_suspend,
+ .resume = occlusion_resume,
+ .result = occlusion_result,
+};
+
+static void
+realloc_query_bo(struct etna_context *ctx, struct etna_hw_query *hq)
+{
+ struct etna_resource *rsc;
+ void *map;
+
+ pipe_resource_reference(&hq->prsc, NULL);
+
+ /* allocate resource with space for 64 * 64bit values */
+ hq->prsc = pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER,
+ 0, 0x1000);
+
+ /* don't assume the buffer is zero-initialized */
+ rsc = etna_resource(hq->prsc);
+
+ etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_WRITE);
+
+ map = etna_bo_map(rsc->bo);
+ memset(map, 0, 0x1000);
+ etna_bo_cpu_fini(rsc->bo);
+}
+
+static boolean
+etna_hw_begin_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ /* ->begin_query() discards previous results, so realloc bo */
+ realloc_query_bo(ctx, hq);
+
+ p->start(hq, ctx);
+
+ /* add to active list */
+ assert(list_empty(&hq->node));
+ list_addtail(&hq->node, &ctx->active_hw_queries);
+
+ return true;
+}
+
+static void
+etna_hw_end_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ p->stop(hq, ctx);
+
+ /* remove from active list */
+ list_delinit(&hq->node);
+}
+
+static boolean
+etna_hw_get_query_result(struct etna_context *ctx, struct etna_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ struct etna_resource *rsc = etna_resource(hq->prsc);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ assert(LIST_IS_EMPTY(&hq->node));
+
+ if (!wait) {
+ int ret;
+
+ if (rsc->status & ETNA_PENDING_WRITE) {
+ /* piglit spec@arb_occlusion_query@occlusion_query_conform
+ * test, and silly apps perhaps, get stuck in a loop trying
+ * to get query result forever with wait==false.. we don't
+ * wait to flush unnecessarily but we also don't want to
+ * spin forever.
+ */
+ if (hq->no_wait_cnt++ > 5)
+ ctx->base.flush(&ctx->base, NULL, 0);
+ return false;
+ }
+
+ ret = etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ | DRM_ETNA_PREP_NOSYNC);
+ if (ret)
+ return false;
+
+ etna_bo_cpu_fini(rsc->bo);
+ }
+
+ /* flush that GPU executes all query related actions */
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ /* get the result */
+ etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ);
+
+ void *ptr = etna_bo_map(rsc->bo);
+ p->result(hq, ptr, result);
+
+ etna_bo_cpu_fini(rsc->bo);
+
+ return true;
+}
+
+static const struct etna_query_funcs hw_query_funcs = {
+ .destroy_query = etna_hw_destroy_query,
+ .begin_query = etna_hw_begin_query,
+ .end_query = etna_hw_end_query,
+ .get_query_result = etna_hw_get_query_result,
+};
+
+static inline const struct etna_hw_sample_provider *
+query_sample_provider(unsigned query_type)
+{
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ /* fallthrough */
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ /* fallthrough */
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ return &occlusion_provider;
+ default:
+ return NULL;
+ }
+}
+
+struct etna_query *
+etna_hw_create_query(struct etna_context *ctx, unsigned query_type)
+{
+ struct etna_hw_query *hq;
+ struct etna_query *q;
+ const struct etna_hw_sample_provider *p;
+
+ p = query_sample_provider(query_type);
+ if (!p)
+ return NULL;
+
+ hq = CALLOC_STRUCT(etna_hw_query);
+ if (!hq)
+ return NULL;
+
+ hq->provider = p;
+
+ list_inithead(&hq->node);
+
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = query_type;
+
+ return q;
+}
diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h
new file mode 100644
index 000000000..73f3c851e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_query_hw.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Etnaviv Project
+ * Copyright (C) 2017 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ * Christian Gmeiner <christian.gmeiner@gmail.com>
+ */
+
+#ifndef H_ETNAVIV_QUERY_HW
+#define H_ETNAVIV_QUERY_HW
+
+#include "etnaviv_query.h"
+
+struct etna_hw_query;
+
+struct etna_hw_sample_provider {
+ void (*start)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*stop)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*suspend)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*resume)(struct etna_hw_query *hq, struct etna_context *ctx);
+
+ void (*result)(struct etna_hw_query *hq, void *buf,
+ union pipe_query_result *result);
+};
+
+struct etna_hw_query {
+ struct etna_query base;
+
+ struct pipe_resource *prsc;
+ unsigned samples; /* number of samples stored in resource */
+ unsigned no_wait_cnt; /* see etna_hw_get_query_result() */
+ struct list_head node; /* list-node in ctx->active_hw_queries */
+
+ const struct etna_hw_sample_provider *provider;
+};
+
+static inline struct etna_hw_query *
+etna_hw_query(struct etna_query *q)
+{
+ return (struct etna_hw_query *)q;
+}
+
+struct etna_query *
+etna_hw_create_query(struct etna_context *ctx, unsigned query_type);
+
+static inline void
+etna_hw_query_suspend(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ if (!hq->base.active)
+ return;
+
+ p->suspend(hq, ctx);
+}
+
+static inline void
+etna_hw_query_resume(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ if (!hq->base.active)
+ return;
+
+ p->resume(hq, ctx);
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
index 3c8d8f7c0..1d3d2fb76 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
@@ -34,7 +34,9 @@
struct fd2_blend_stateobj {
struct pipe_blend_state base;
- uint32_t rb_blendcontrol;
+ uint32_t rb_blendcontrol_rgb;
+ uint32_t rb_blendcontrol_alpha;
+ uint32_t rb_blendcontrol_no_alpha_rgb;
uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
uint32_t rb_colormask;
};
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index 6dc639670..0905ab6f6 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -46,6 +46,12 @@ static uint32_t fmt2swap(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
/* TODO probably some more.. */
return 1;
default:
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
index 12ab5b410..c2a60c683 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
@@ -52,8 +52,15 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
/* TODO figure out how to render to other formats.. */
if ((usage & PIPE_BIND_RENDER_TARGET) &&
- ((format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
- (format != PIPE_FORMAT_B8G8R8X8_UNORM))) {
+ ((format != PIPE_FORMAT_B5G6R5_UNORM) &&
+ (format != PIPE_FORMAT_B5G5R5A1_UNORM) &&
+ (format != PIPE_FORMAT_B5G5R5X1_UNORM) &&
+ (format != PIPE_FORMAT_B4G4R4A4_UNORM) &&
+ (format != PIPE_FORMAT_B4G4R4X4_UNORM) &&
+ (format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
+ (format != PIPE_FORMAT_B8G8R8X8_UNORM) &&
+ (format != PIPE_FORMAT_R8G8B8A8_UNORM) &&
+ (format != PIPE_FORMAT_R8G8B8X8_UNORM))) {
DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return FALSE;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h
index 5294ced3c..c2808162c 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -78,10 +78,8 @@
/**
* Max number of instructions (for all fragment shaders combined per context)
* that will be kept around (counted in terms of llvm ir).
- * Note: the definition looks odd, but there's branches which use a different
- * number of max shader variants.
*/
-#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS)
+#define LP_MAX_SHADER_INSTRUCTIONS (2048 * LP_MAX_SHADER_VARIANTS)
/**
* Max number of setup variants that will be kept around.
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c
index d5ed6561b..7b81903b4 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c
@@ -125,6 +125,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
for (i = 0; i < num_threads; i++) {
/* safer (still not guaranteed) when there's an overflow */
vresult->b = vresult->b || pq->end[i];
@@ -155,6 +156,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe,
*result = pq->num_primitives_written;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
vresult->b = pq->num_primitives_generated > pq->num_primitives_written;
break;
case PIPE_QUERY_SO_STATISTICS: {
@@ -215,6 +217,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
break;
@@ -229,6 +232,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
llvmpipe->active_occlusion_queries++;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
break;
@@ -264,6 +268,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
pq->num_primitives_written =
llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written;
pq->num_primitives_generated =
@@ -291,6 +296,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
assert(llvmpipe->active_occlusion_queries);
llvmpipe->active_occlusion_queries--;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 13bed088b..98d6bbe50 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -95,12 +95,12 @@ write_tsv_row(FILE *fp,
fprintf(fp,
"%s\t%s\t%s\t%s\t%s\t%s\n",
- util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
- util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
- util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
- util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
- util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
- util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+ util_str_blend_func(blend->rt[0].rgb_func, TRUE),
+ util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ util_str_blend_func(blend->rt[0].alpha_func, TRUE),
+ util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fflush(fp);
}
@@ -119,12 +119,12 @@ dump_blend_type(FILE *fp,
fprintf(fp,
" %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
- "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
- "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
- "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
- "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
- "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
- "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+ "rgb_func", util_str_blend_func(blend->rt[0].rgb_func, TRUE),
+ "rgb_src_factor", util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ "rgb_dst_factor", util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ "alpha_func", util_str_blend_func(blend->rt[0].alpha_func, TRUE),
+ "alpha_src_factor", util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ "alpha_dst_factor", util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fprintf(fp, " ...\n");
fflush(fp);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c
index 9b1616213..e9a6e01fd 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -357,8 +357,6 @@ test_all(unsigned verbose, FILE *fp)
enum pipe_format format;
boolean success = TRUE;
- util_format_s3tc_init();
-
#if USE_TEXTURE_CACHE
cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
#endif
@@ -383,11 +381,6 @@ test_all(unsigned verbose, FILE *fp)
if (util_format_is_pure_integer(format))
continue;
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
- !util_format_s3tc_enabled) {
- continue;
- }
-
/* only have util fetch func for etc1 */
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
format != PIPE_FORMAT_ETC1_RGB8) {
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c
index 733253b0b..162c74ad7 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -303,6 +303,8 @@ llvmpipe_resource_create_front(struct pipe_screen *_screen,
FREE(lpr);
return NULL;
}
+
+
static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
@@ -310,6 +312,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
return llvmpipe_resource_create_front(_screen, templat, NULL);
}
+
static void
llvmpipe_resource_destroy(struct pipe_screen *pscreen,
struct pipe_resource *pt)
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
index 5556e0c77..89d5b935e 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
@@ -10,7 +10,6 @@
* POW - EX2 + MUL + LG2
* SUB - ADD, second source negated
* SWZ - MOV
- * XPD -
*
* Register access
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index e66d8af76..907ca17b0 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -163,8 +163,6 @@
* SUB - ADD, negate second source
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
- * SCS - COS + SIN
- * XPD
*
* NV40 Looping
* Loops appear to be fairly expensive on NV40 at least, the proprietary
diff --git a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 727b50937..ac3e409b2 100644
--- a/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/lib/mesa/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -157,6 +157,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->nesting = nv50->screen->num_occlusion_queries_active++;
if (hq->nesting) {
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
@@ -215,6 +216,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
nv50_hw_query_get(push, q, 0, 0x0100f002);
if (--nv50->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 2);
@@ -307,6 +309,7 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
res64[0] = hq->data[1] - hq->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
res8[0] = hq->data[1] != hq->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
@@ -378,6 +381,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->rotate = 32;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
diff --git a/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
index 075eac50f..3ec5a42c7 100644
--- a/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
+++ b/lib/mesa/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
@@ -35,6 +35,8 @@
#include <stdio.h>
+#include "util/macros.h"
+
#include "r300_reg.h"
#include "radeon_compiler.h"
@@ -61,7 +63,7 @@ static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
-static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+static const int num_native_swizzles = ARRAY_SIZE(native_swizzles);
/**
* Find a native RGB swizzle that matches the given swizzle.
diff --git a/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c b/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c
index 0f021e9f4..d86819afa 100644
--- a/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c
+++ b/lib/mesa/src/gallium/drivers/r300/r300_hyperz.c
@@ -26,7 +26,6 @@
#include "r300_fs.h"
#include "util/u_format.h"
-#include "util/u_mm.h"
/*
HiZ rules - taken from various docs
diff --git a/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c b/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c
new file mode 100644
index 000000000..6bc307a4b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/cayman_msaa.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+#include "r600_cs.h"
+#include "evergreend.h"
+
+/* 2xMSAA
+ * There are two locations (4, 4), (-4, -4). */
+const uint32_t eg_sample_locs_2x[4] = {
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+};
+const unsigned eg_max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
+const uint32_t eg_sample_locs_4x[4] = {
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+};
+const unsigned eg_max_dist_4x = 6;
+
+/* Cayman 8xMSAA */
+static const uint32_t cm_sample_locs_8x[] = {
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+};
+static const unsigned cm_max_dist_8x = 8;
+/* Cayman 16xMSAA */
+static const uint32_t cm_sample_locs_16x[] = {
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+};
+static const unsigned cm_max_dist_16x = 8;
+
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 16:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
+void cayman_init_msaa(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ int i;
+
+ cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
+
+ for (i = 0; i < 2; i++)
+ cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
+ for (i = 0; i < 4; i++)
+ cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
+ for (i = 0; i < 8; i++)
+ cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
+ for (i = 0; i < 16; i++)
+ cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
+}
+
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
+{
+ switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
+ case 2:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+ break;
+ case 4:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+ break;
+ case 8:
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_emit(cs, cm_sample_locs_8x[0]);
+ radeon_emit(cs, cm_sample_locs_8x[4]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[1]);
+ radeon_emit(cs, cm_sample_locs_8x[5]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[2]);
+ radeon_emit(cs, cm_sample_locs_8x[6]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[3]);
+ radeon_emit(cs, cm_sample_locs_8x[7]);
+ break;
+ case 16:
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_emit(cs, cm_sample_locs_16x[0]);
+ radeon_emit(cs, cm_sample_locs_16x[4]);
+ radeon_emit(cs, cm_sample_locs_16x[8]);
+ radeon_emit(cs, cm_sample_locs_16x[12]);
+ radeon_emit(cs, cm_sample_locs_16x[1]);
+ radeon_emit(cs, cm_sample_locs_16x[5]);
+ radeon_emit(cs, cm_sample_locs_16x[9]);
+ radeon_emit(cs, cm_sample_locs_16x[13]);
+ radeon_emit(cs, cm_sample_locs_16x[2]);
+ radeon_emit(cs, cm_sample_locs_16x[6]);
+ radeon_emit(cs, cm_sample_locs_16x[10]);
+ radeon_emit(cs, cm_sample_locs_16x[14]);
+ radeon_emit(cs, cm_sample_locs_16x[3]);
+ radeon_emit(cs, cm_sample_locs_16x[7]);
+ radeon_emit(cs, cm_sample_locs_16x[11]);
+ radeon_emit(cs, cm_sample_locs_16x[15]);
+ break;
+ }
+}
+
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+ int ps_iter_samples, int overrast_samples,
+ unsigned sc_mode_cntl_1)
+{
+ int setup_samples = nr_samples > 1 ? nr_samples :
+ overrast_samples > 1 ? overrast_samples : 0;
+ /* Required by OpenGL line rasterization.
+ *
+ * TODO: We should also enable perpendicular endcaps for AA lines,
+ * but that requires implementing line stippling in the pixel
+ * shader. SC can only do line stippling with axis-aligned
+ * endcaps.
+ */
+ unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
+
+ if (setup_samples > 1) {
+ /* indexed by log2(nr_samples) */
+ unsigned max_dist[] = {
+ 0,
+ eg_max_dist_2x,
+ eg_max_dist_4x,
+ cm_max_dist_8x,
+ cm_max_dist_16x
+ };
+ unsigned log_samples = util_logbase2(setup_samples);
+ unsigned log_ps_iter_samples =
+ util_logbase2(util_next_power_of_two(ps_iter_samples));
+
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl |
+ S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+ S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ if (nr_samples > 1) {
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+ S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+ S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+ S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
+ sc_mode_cntl_1);
+ } else if (overrast_samples > 1) {
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
+ S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
+ } else {
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c
new file mode 100644
index 000000000..a6e3b7fcf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_buffer_common.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "r600_cs.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage)
+{
+ if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
+ return true;
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
+ return true;
+ }
+ return false;
+}
+
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage)
+{
+ enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
+ bool busy = false;
+
+ assert(!(resource->flags & RADEON_FLAG_SPARSE));
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+ return ctx->ws->buffer_map(resource->buf, NULL, usage);
+ }
+
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* have to wait for the last write */
+ rusage = RADEON_USAGE_WRITE;
+ }
+
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ resource->buf, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ } else {
+ ctx->gfx.flush(ctx, 0, NULL);
+ busy = true;
+ }
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ resource->buf, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ } else {
+ ctx->dma.flush(ctx, 0, NULL);
+ busy = true;
+ }
+ }
+
+ if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ return NULL;
+ } else {
+ /* We will be wait for the GPU. Wait for any offloaded
+ * CS flush to complete to avoid busy-waiting in the winsys. */
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+ if (ctx->dma.cs)
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
+ }
+ }
+
+ /* Setting the CS to NULL will prevent doing checks we have done already. */
+ return ctx->ws->buffer_map(resource->buf, NULL, usage);
+}
+
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment)
+{
+ struct r600_texture *rtex = (struct r600_texture*)res;
+
+ res->bo_size = size;
+ res->bo_alignment = alignment;
+ res->flags = 0;
+ res->texture_handle_allocated = false;
+ res->image_handle_allocated = false;
+
+ switch (res->b.b.usage) {
+ case PIPE_USAGE_STREAM:
+ res->flags = RADEON_FLAG_GTT_WC;
+ /* fall through */
+ case PIPE_USAGE_STAGING:
+ /* Transfers are likely to occur more often with these
+ * resources. */
+ res->domains = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DYNAMIC:
+ /* Older kernels didn't always flush the HDP cache before
+ * CS execution
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40) {
+ res->domains = RADEON_DOMAIN_GTT;
+ res->flags |= RADEON_FLAG_GTT_WC;
+ break;
+ }
+ /* fall through */
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ /* Not listing GTT here improves performance in some
+ * apps. */
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags |= RADEON_FLAG_GTT_WC;
+ break;
+ }
+
+ if (res->b.b.target == PIPE_BUFFER &&
+ res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+ PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
+ /* Use GTT for all persistent mappings with older
+ * kernels, because they didn't always flush the HDP
+ * cache before CS execution.
+ *
+ * Write-combined CPU mappings are fine, the kernel
+ * ensures all CPU writes finish before the GPU
+ * executes a command stream.
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40)
+ res->domains = RADEON_DOMAIN_GTT;
+ }
+
+ /* Tiled textures are unmappable. Always put them in VRAM. */
+ if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
+ res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_GTT_WC;
+ }
+
+ /* Only displayable single-sample textures can be shared between
+ * processes. */
+ if (res->b.b.target == PIPE_BUFFER ||
+ res->b.b.nr_samples >= 2 ||
+ (rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY &&
+ /* Raven doesn't use display micro mode for 32bpp, so check this: */
+ !(res->b.b.bind & PIPE_BIND_SCANOUT)))
+ res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
+ *
+ * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
+ * placements even with a low amount of stolen VRAM.
+ */
+ if (!rscreen->info.has_dedicated_vram &&
+ (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
+ res->domains == RADEON_DOMAIN_VRAM) {
+ res->domains = RADEON_DOMAIN_VRAM_GTT;
+ res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
+ }
+
+ if (rscreen->debug_flags & DBG_NO_WC)
+ res->flags &= ~RADEON_FLAG_GTT_WC;
+
+ if (res->b.b.bind & PIPE_BIND_SHARED)
+ res->flags |= RADEON_FLAG_NO_SUBALLOC;
+
+ /* Set expected VRAM and GART usage for the buffer. */
+ res->vram_usage = 0;
+ res->gart_usage = 0;
+
+ if (res->domains & RADEON_DOMAIN_VRAM)
+ res->vram_usage = size;
+ else if (res->domains & RADEON_DOMAIN_GTT)
+ res->gart_usage = size;
+}
+
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res)
+{
+ struct pb_buffer *old_buf, *new_buf;
+
+ /* Allocate a new resource. */
+ new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
+ res->bo_alignment,
+ res->domains, res->flags);
+ if (!new_buf) {
+ return false;
+ }
+
+ /* Replace the pointer such that if res->buf wasn't NULL, it won't be
+ * NULL. This should prevent crashes with multiple contexts using
+ * the same buffer where one of the contexts invalidates it while
+ * the others are using it. */
+ old_buf = res->buf;
+ res->buf = new_buf; /* should be atomic */
+
+ if (rscreen->info.has_virtual_memory)
+ res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
+ else
+ res->gpu_address = 0;
+
+ pb_reference(&old_buf, NULL);
+
+ util_range_set_empty(&res->valid_buffer_range);
+
+ /* Print debug information. */
+ if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
+ res->gpu_address, res->gpu_address + res->buf->size,
+ res->buf->size);
+ }
+ return true;
+}
+
+static void r600_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
+{
+ struct r600_resource *rbuffer = r600_resource(buf);
+
+ threaded_resource_deinit(buf);
+ util_range_destroy(&rbuffer->valid_buffer_range);
+ pb_reference(&rbuffer->buf, NULL);
+ FREE(rbuffer);
+}
+
+static bool
+r600_invalidate_buffer(struct r600_common_context *rctx,
+ struct r600_resource *rbuffer)
+{
+ /* Shared buffers can't be reallocated. */
+ if (rbuffer->b.is_shared)
+ return false;
+
+ /* Sparse buffers can't be reallocated. */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE)
+ return false;
+
+ /* In AMD_pinned_memory, the user pointer association only gets
+ * broken when the buffer is explicitly re-allocated.
+ */
+ if (rbuffer->b.is_user_ptr)
+ return false;
+
+ /* Check if mapping this buffer would cause waiting for the GPU. */
+ if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
+ } else {
+ util_range_set_empty(&rbuffer->valid_buffer_range);
+ }
+
+ return true;
+}
+
+/* Replace the storage of dst with src. */
+void r600_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_resource *rdst = r600_resource(dst);
+ struct r600_resource *rsrc = r600_resource(src);
+ uint64_t old_gpu_address = rdst->gpu_address;
+
+ pb_reference(&rdst->buf, rsrc->buf);
+ rdst->gpu_address = rsrc->gpu_address;
+ rdst->b.b.bind = rsrc->b.b.bind;
+ rdst->flags = rsrc->flags;
+
+ assert(rdst->vram_usage == rsrc->vram_usage);
+ assert(rdst->gart_usage == rsrc->gart_usage);
+ assert(rdst->bo_size == rsrc->bo_size);
+ assert(rdst->bo_alignment == rsrc->bo_alignment);
+ assert(rdst->domains == rsrc->domains);
+
+ rctx->rebind_buffer(ctx, dst, old_gpu_address);
+}
+
+void r600_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_resource *rbuffer = r600_resource(resource);
+
+ /* We currently only do anyting here for buffers */
+ if (resource->target == PIPE_BUFFER)
+ (void)r600_invalidate_buffer(rctx, rbuffer);
+}
+
+static void *r600_buffer_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer,
+ void *data, struct r600_resource *staging,
+ unsigned offset)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *transfer;
+
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
+ transfer = slab_alloc(&rctx->pool_transfers_unsync);
+ else
+ transfer = slab_alloc(&rctx->pool_transfers);
+
+ transfer->b.b.resource = NULL;
+ pipe_resource_reference(&transfer->b.b.resource, resource);
+ transfer->b.b.level = 0;
+ transfer->b.b.usage = usage;
+ transfer->b.b.box = *box;
+ transfer->b.b.stride = 0;
+ transfer->b.b.layer_stride = 0;
+ transfer->b.staging = NULL;
+ transfer->offset = offset;
+ transfer->staging = staging;
+ *ptransfer = &transfer->b.b;
+ return data;
+}
+
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+ unsigned dstx, unsigned srcx, unsigned size)
+{
+ bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+
+ return rctx->screen->has_cp_dma ||
+ (dword_aligned && (rctx->dma.cs ||
+ rctx->screen->has_streamout));
+
+}
+
+static void *r600_buffer_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
+ struct r600_resource *rbuffer = r600_resource(resource);
+ uint8_t *data;
+
+ assert(box->x + box->width <= resource->width0);
+
+ /* From GL_AMD_pinned_memory issues:
+ *
+ * 4) Is glMapBuffer on a shared buffer guaranteed to return the
+ * same system address which was specified at creation time?
+ *
+ * RESOLVED: NO. The GL implementation might return a different
+ * virtual mapping of that memory, although the same physical
+ * page will be used.
+ *
+ * So don't ever use staging buffers.
+ */
+ if (rbuffer->b.is_user_ptr)
+ usage |= PIPE_TRANSFER_PERSISTENT;
+
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
+ usage & PIPE_TRANSFER_WRITE &&
+ !rbuffer->b.is_shared &&
+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+
+ /* If discarding the entire range, discard the whole resource instead. */
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+ box->x == 0 && box->width == resource->width0) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
+ !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ TC_TRANSFER_MAP_NO_INVALIDATE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ if (r600_invalidate_buffer(rctx, rbuffer)) {
+ /* At this point, the buffer is always idle. */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ } else {
+ /* Fall back to a temporary buffer. */
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ }
+ }
+
+ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
+ ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_PERSISTENT)) &&
+ r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ /* Check if mapping this buffer would cause waiting for the GPU.
+ */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+ r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ /* Do a wait-free write-only transfer using a temporary buffer. */
+ unsigned offset;
+ struct r600_resource *staging = NULL;
+
+ u_upload_alloc(ctx->stream_uploader, 0,
+ box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, (struct pipe_resource**)&staging,
+ (void**)&data);
+
+ if (staging) {
+ data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, staging, offset);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ } else {
+ /* At this point, the buffer is always idle (we checked it above). */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+ }
+ /* Use a staging buffer in cached GTT for reads. */
+ else if (((usage & PIPE_TRANSFER_READ) &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ (rbuffer->domains & RADEON_DOMAIN_VRAM ||
+ rbuffer->flags & RADEON_FLAG_GTT_WC) &&
+ r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE)) {
+ struct r600_resource *staging;
+
+ assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
+ staging = (struct r600_resource*) pipe_buffer_create(
+ ctx->screen, 0, PIPE_USAGE_STAGING,
+ box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
+ if (staging) {
+ /* Copy the VRAM buffer to the staging buffer. */
+ rctx->dma_copy(ctx, &staging->b.b, 0,
+ box->x % R600_MAP_BUFFER_ALIGNMENT,
+ 0, 0, resource, 0, box);
+
+ data = r600_buffer_map_sync_with_rings(rctx, staging,
+ usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
+ if (!data) {
+ r600_resource_reference(&staging, NULL);
+ return NULL;
+ }
+ data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, staging, 0);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ }
+
+ data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
+ if (!data) {
+ return NULL;
+ }
+ data += box->x;
+
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, NULL, 0);
+}
+
+static void r600_buffer_do_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct r600_resource *rbuffer = r600_resource(transfer->resource);
+
+ if (rtransfer->staging) {
+ struct pipe_resource *dst, *src;
+ unsigned soffset;
+ struct pipe_box dma_box;
+
+ dst = transfer->resource;
+ src = &rtransfer->staging->b.b;
+ soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
+
+ u_box_1d(soffset, box->width, &dma_box);
+
+ /* Copy the staging buffer into the original one. */
+ ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
+ }
+
+ util_range_add(&rbuffer->valid_buffer_range, box->x,
+ box->x + box->width);
+}
+
+static void r600_buffer_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *rel_box)
+{
+ unsigned required_usage = PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT;
+
+ if ((transfer->usage & required_usage) == required_usage) {
+ struct pipe_box box;
+
+ u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+ r600_buffer_do_flush_region(ctx, transfer, &box);
+ }
+}
+
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+ if (transfer->usage & PIPE_TRANSFER_WRITE &&
+ !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
+
+ r600_resource_reference(&rtransfer->staging, NULL);
+ assert(rtransfer->b.staging == NULL); /* for threaded context only */
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ /* Don't use pool_transfers_unsync. We are always in the driver
+ * thread. */
+ slab_free(&rctx->pool_transfers, transfer);
+}
+
+void r600_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data)
+{
+ struct pipe_transfer *transfer = NULL;
+ struct pipe_box box;
+ uint8_t *map = NULL;
+
+ u_box_1d(offset, size, &box);
+ map = r600_buffer_transfer_map(ctx, buffer, 0,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_DISCARD_RANGE |
+ usage,
+ &box, &transfer);
+ if (!map)
+ return;
+
+ memcpy(map, data, size);
+ r600_buffer_transfer_unmap(ctx, transfer);
+}
+
+static const struct u_resource_vtbl r600_buffer_vtbl =
+{
+ NULL, /* get_handle */
+ r600_buffer_destroy, /* resource_destroy */
+ r600_buffer_transfer_map, /* transfer_map */
+ r600_buffer_flush_region, /* transfer_flush_region */
+ r600_buffer_transfer_unmap, /* transfer_unmap */
+};
+
+static struct r600_resource *
+r600_alloc_buffer_struct(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_resource *rbuffer;
+
+ rbuffer = MALLOC_STRUCT(r600_resource);
+
+ rbuffer->b.b = *templ;
+ rbuffer->b.b.next = NULL;
+ pipe_reference_init(&rbuffer->b.b.reference, 1);
+ rbuffer->b.b.screen = screen;
+
+ rbuffer->b.vtbl = &r600_buffer_vtbl;
+ threaded_resource_init(&rbuffer->b.b);
+
+ rbuffer->buf = NULL;
+ rbuffer->bind_history = 0;
+ util_range_init(&rbuffer->valid_buffer_range);
+ return rbuffer;
+}
+
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+
+ r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
+
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ rbuffer->flags |= RADEON_FLAG_SPARSE;
+
+ if (!r600_alloc_resource(rscreen, rbuffer)) {
+ FREE(rbuffer);
+ return NULL;
+ }
+ return &rbuffer->b.b;
+}
+
+struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment)
+{
+ struct pipe_resource buffer;
+
+ memset(&buffer, 0, sizeof buffer);
+ buffer.target = PIPE_BUFFER;
+ buffer.format = PIPE_FORMAT_R8_UNORM;
+ buffer.bind = 0;
+ buffer.usage = usage;
+ buffer.flags = flags;
+ buffer.width0 = size;
+ buffer.height0 = 1;
+ buffer.depth0 = 1;
+ buffer.array_size = 1;
+ return r600_buffer_create(screen, &buffer, alignment);
+}
+
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+
+ rbuffer->domains = RADEON_DOMAIN_GTT;
+ rbuffer->flags = 0;
+ rbuffer->b.is_user_ptr = true;
+ util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
+ util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+
+ /* Convert a user pointer to a buffer. */
+ rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
+ if (!rbuffer->buf) {
+ FREE(rbuffer);
+ return NULL;
+ }
+
+ if (rscreen->info.has_virtual_memory)
+ rbuffer->gpu_address =
+ ws->buffer_get_virtual_address(rbuffer->buf);
+ else
+ rbuffer->gpu_address = 0;
+
+ rbuffer->vram_usage = 0;
+ rbuffer->gart_usage = templ->width0;
+
+ return &rbuffer->b.b;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_cs.h b/lib/mesa/src/gallium/drivers/r600/r600_cs.h
new file mode 100644
index 000000000..0efae09f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_cs.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ */
+
+/**
+ * This file contains helpers for writing commands to commands streams.
+ */
+
+#ifndef R600_CS_H
+#define R600_CS_H
+
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+
+/**
+ * Return true if there is enough memory in VRAM and GTT for the buffers
+ * added so far.
+ *
+ * \param vram VRAM memory size not added to the buffer list yet
+ * \param gtt GTT memory size not added to the buffer list yet
+ */
+static inline bool
+radeon_cs_memory_below_limit(struct r600_common_screen *screen,
+ struct radeon_winsys_cs *cs,
+ uint64_t vram, uint64_t gtt)
+{
+ vram += cs->used_vram;
+ gtt += cs->used_gart;
+
+ /* Anything that goes above the VRAM size should go to GTT. */
+ if (vram > screen->info.vram_size)
+ gtt += vram - screen->info.vram_size;
+
+ /* Now we just need to check if we have enough GTT. */
+ return gtt < screen->info.gart_size * 0.7;
+}
+
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+ struct r600_ring *ring,
+ struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority)
+{
+ assert(usage);
+ return rctx->ws->cs_add_buffer(
+ ring->cs, rbo->buf,
+ (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+ rbo->domains, priority) * 4;
+}
+
+/**
+ * Same as above, but also checks memory usage and flushes the context
+ * accordingly.
+ *
+ * When this SHOULD NOT be used:
+ *
+ * - if r600_context_add_resource_size has been called for the buffer
+ * followed by *_need_cs_space for checking the memory usage
+ *
+ * - if r600_need_dma_space has been called for the buffer
+ *
+ * - when emitting state packets and draw packets (because preceding packets
+ * can't be re-emitted at that point)
+ *
+ * - if shader resource "enabled_mask" is not up-to-date or there is
+ * a different constraint disallowing a context flush
+ */
+static inline unsigned
+radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
+ struct r600_ring *ring,
+ struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority,
+ bool check_mem)
+{
+ if (check_mem &&
+ !radeon_cs_memory_below_limit(rctx->screen, ring->cs,
+ rctx->vram + rbo->vram_usage,
+ rctx->gtt + rbo->gart_usage))
+ ring->flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+
+ return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
+}
+
+static inline void r600_emit_reloc(struct r600_common_context *rctx,
+ struct r600_ring *ring, struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority)
+{
+ struct radeon_winsys_cs *cs = ring->cs;
+ bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
+ unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
+
+ if (!has_vm) {
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, reloc);
+ }
+}
+
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg < R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_config_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
+ radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_context_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
+ unsigned reg, unsigned idx,
+ unsigned value)
+{
+ assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 3 <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+ radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
+ radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_sh_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_uconfig_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
+ unsigned reg, unsigned idx,
+ unsigned value)
+{
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->current.cdw + 3 <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c b/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c
new file mode 100644
index 000000000..c15fb9dfa
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_gpu_load.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+/* The GPU load is measured as follows.
+ *
+ * There is a thread which samples the GRBM_STATUS register at a certain
+ * frequency and the "busy" or "idle" counter is incremented based on
+ * whether the GUI_ACTIVE bit is set or not.
+ *
+ * Then, the user can sample the counters twice and calculate the average
+ * GPU load between the two samples.
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_query.h"
+#include "os/os_time.h"
+
+/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
+ * fps (there are too few samples per frame). */
+#define SAMPLES_PER_SEC 10000
+
+#define GRBM_STATUS 0x8010
+#define TA_BUSY(x) (((x) >> 14) & 0x1)
+#define GDS_BUSY(x) (((x) >> 15) & 0x1)
+#define VGT_BUSY(x) (((x) >> 17) & 0x1)
+#define IA_BUSY(x) (((x) >> 19) & 0x1)
+#define SX_BUSY(x) (((x) >> 20) & 0x1)
+#define WD_BUSY(x) (((x) >> 21) & 0x1)
+#define SPI_BUSY(x) (((x) >> 22) & 0x1)
+#define BCI_BUSY(x) (((x) >> 23) & 0x1)
+#define SC_BUSY(x) (((x) >> 24) & 0x1)
+#define PA_BUSY(x) (((x) >> 25) & 0x1)
+#define DB_BUSY(x) (((x) >> 26) & 0x1)
+#define CP_BUSY(x) (((x) >> 29) & 0x1)
+#define CB_BUSY(x) (((x) >> 30) & 0x1)
+#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
+
+#define SRBM_STATUS2 0x0e4c
+#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
+
+#define CP_STAT 0x8680
+#define PFP_BUSY(x) (((x) >> 15) & 0x1)
+#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
+#define ME_BUSY(x) (((x) >> 17) & 0x1)
+#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
+#define DMA_BUSY(x) (((x) >> 22) & 0x1)
+#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
+
+#define IDENTITY(x) x
+
+#define UPDATE_COUNTER(field, mask) \
+ do { \
+ if (mask(value)) \
+ p_atomic_inc(&counters->named.field.busy); \
+ else \
+ p_atomic_inc(&counters->named.field.idle); \
+ } while (0)
+
+static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
+ union r600_mmio_counters *counters)
+{
+ uint32_t value = 0;
+ bool gui_busy, sdma_busy = false;
+
+ /* GRBM_STATUS */
+ rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
+
+ UPDATE_COUNTER(ta, TA_BUSY);
+ UPDATE_COUNTER(gds, GDS_BUSY);
+ UPDATE_COUNTER(vgt, VGT_BUSY);
+ UPDATE_COUNTER(ia, IA_BUSY);
+ UPDATE_COUNTER(sx, SX_BUSY);
+ UPDATE_COUNTER(wd, WD_BUSY);
+ UPDATE_COUNTER(spi, SPI_BUSY);
+ UPDATE_COUNTER(bci, BCI_BUSY);
+ UPDATE_COUNTER(sc, SC_BUSY);
+ UPDATE_COUNTER(pa, PA_BUSY);
+ UPDATE_COUNTER(db, DB_BUSY);
+ UPDATE_COUNTER(cp, CP_BUSY);
+ UPDATE_COUNTER(cb, CB_BUSY);
+ UPDATE_COUNTER(gui, GUI_ACTIVE);
+ gui_busy = GUI_ACTIVE(value);
+
+ value = gui_busy || sdma_busy;
+ UPDATE_COUNTER(gpu, IDENTITY);
+}
+
+#undef UPDATE_COUNTER
+
+static int
+r600_gpu_load_thread(void *param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
+ const int period_us = 1000000 / SAMPLES_PER_SEC;
+ int sleep_us = period_us;
+ int64_t cur_time, last_time = os_time_get();
+
+ while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
+ if (sleep_us)
+ os_time_sleep(sleep_us);
+
+ /* Make sure we sleep the ideal amount of time to match
+ * the expected frequency. */
+ cur_time = os_time_get();
+
+ if (os_time_timeout(last_time, last_time + period_us,
+ cur_time))
+ sleep_us = MAX2(sleep_us - 1, 1);
+ else
+ sleep_us += 1;
+
+ /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
+ last_time = cur_time;
+
+ /* Update the counters. */
+ r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
+ }
+ p_atomic_dec(&rscreen->gpu_load_stop_thread);
+ return 0;
+}
+
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
+{
+ if (!rscreen->gpu_load_thread)
+ return;
+
+ p_atomic_inc(&rscreen->gpu_load_stop_thread);
+ thrd_join(rscreen->gpu_load_thread, NULL);
+ rscreen->gpu_load_thread = 0;
+}
+
+static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
+ unsigned busy_index)
+{
+ /* Start the thread if needed. */
+ if (!rscreen->gpu_load_thread) {
+ mtx_lock(&rscreen->gpu_load_mutex);
+ /* Check again inside the mutex. */
+ if (!rscreen->gpu_load_thread)
+ rscreen->gpu_load_thread =
+ u_thread_create(r600_gpu_load_thread, rscreen);
+ mtx_unlock(&rscreen->gpu_load_mutex);
+ }
+
+ unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
+ unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
+
+ return busy | ((uint64_t)idle << 32);
+}
+
+static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
+ uint64_t begin, unsigned busy_index)
+{
+ uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
+ unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
+ unsigned idle = (end >> 32) - (begin >> 32);
+
+ /* Calculate the % of time the busy counter was being incremented.
+ *
+ * If no counters were incremented, return the current counter status.
+ * It's for the case when the load is queried faster than
+ * the counters are updated.
+ */
+ if (idle || busy) {
+ return busy*100 / (busy + idle);
+ } else {
+ union r600_mmio_counters counters;
+
+ memset(&counters, 0, sizeof(counters));
+ r600_update_mmio_counters(rscreen, &counters);
+ return counters.array[busy_index] ? 100 : 0;
+ }
+}
+
+#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
+ rscreen->mmio_counters.array)
+
+static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
+ unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_GPU_LOAD:
+ return BUSY_INDEX(rscreen, gpu);
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ return BUSY_INDEX(rscreen, spi);
+ case R600_QUERY_GPU_TA_BUSY:
+ return BUSY_INDEX(rscreen, ta);
+ case R600_QUERY_GPU_GDS_BUSY:
+ return BUSY_INDEX(rscreen, gds);
+ case R600_QUERY_GPU_VGT_BUSY:
+ return BUSY_INDEX(rscreen, vgt);
+ case R600_QUERY_GPU_IA_BUSY:
+ return BUSY_INDEX(rscreen, ia);
+ case R600_QUERY_GPU_SX_BUSY:
+ return BUSY_INDEX(rscreen, sx);
+ case R600_QUERY_GPU_WD_BUSY:
+ return BUSY_INDEX(rscreen, wd);
+ case R600_QUERY_GPU_BCI_BUSY:
+ return BUSY_INDEX(rscreen, bci);
+ case R600_QUERY_GPU_SC_BUSY:
+ return BUSY_INDEX(rscreen, sc);
+ case R600_QUERY_GPU_PA_BUSY:
+ return BUSY_INDEX(rscreen, pa);
+ case R600_QUERY_GPU_DB_BUSY:
+ return BUSY_INDEX(rscreen, db);
+ case R600_QUERY_GPU_CP_BUSY:
+ return BUSY_INDEX(rscreen, cp);
+ case R600_QUERY_GPU_CB_BUSY:
+ return BUSY_INDEX(rscreen, cb);
+ case R600_QUERY_GPU_SDMA_BUSY:
+ return BUSY_INDEX(rscreen, sdma);
+ case R600_QUERY_GPU_PFP_BUSY:
+ return BUSY_INDEX(rscreen, pfp);
+ case R600_QUERY_GPU_MEQ_BUSY:
+ return BUSY_INDEX(rscreen, meq);
+ case R600_QUERY_GPU_ME_BUSY:
+ return BUSY_INDEX(rscreen, me);
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ return BUSY_INDEX(rscreen, surf_sync);
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ return BUSY_INDEX(rscreen, cp_dma);
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ return BUSY_INDEX(rscreen, scratch_ram);
+ default:
+ unreachable("invalid query type");
+ }
+}
+
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_read_mmio_counter(rscreen, busy_index);
+}
+
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_end_mmio_counter(rscreen, begin, busy_index);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c b/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c
new file mode 100644
index 000000000..f186acb05
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_perfcounter.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle <nicolai.haehnle@amd.com>
+ *
+ */
+
+#include "util/u_memory.h"
+#include "r600_query.h"
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+
+/* Max counters per HW block */
+#define R600_QUERY_MAX_COUNTERS 16
+
+static struct r600_perfcounter_block *
+lookup_counter(struct r600_perfcounters *pc, unsigned index,
+ unsigned *base_gid, unsigned *sub_index)
+{
+ struct r600_perfcounter_block *block = pc->blocks;
+ unsigned bid;
+
+ *base_gid = 0;
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ unsigned total = block->num_groups * block->num_selectors;
+
+ if (index < total) {
+ *sub_index = index;
+ return block;
+ }
+
+ index -= total;
+ *base_gid += block->num_groups;
+ }
+
+ return NULL;
+}
+
+static struct r600_perfcounter_block *
+lookup_group(struct r600_perfcounters *pc, unsigned *index)
+{
+ unsigned bid;
+ struct r600_perfcounter_block *block = pc->blocks;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ if (*index < block->num_groups)
+ return block;
+ *index -= block->num_groups;
+ }
+
+ return NULL;
+}
+
+struct r600_pc_group {
+ struct r600_pc_group *next;
+ struct r600_perfcounter_block *block;
+ unsigned sub_gid; /* only used during init */
+ unsigned result_base; /* only used during init */
+ int se;
+ int instance;
+ unsigned num_counters;
+ unsigned selectors[R600_QUERY_MAX_COUNTERS];
+};
+
+struct r600_pc_counter {
+ unsigned base;
+ unsigned qwords;
+ unsigned stride; /* in uint64s */
+};
+
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
+struct r600_query_pc {
+ struct r600_query_hw b;
+
+ unsigned shaders;
+ unsigned num_counters;
+ struct r600_pc_counter *counters;
+ struct r600_pc_group *groups;
+};
+
+static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)rquery;
+
+ while (query->groups) {
+ struct r600_pc_group *group = query->groups;
+ query->groups = group->next;
+ FREE(group);
+ }
+
+ FREE(query->counters);
+
+ r600_query_hw_destroy(rscreen, rquery);
+}
+
+static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer)
+{
+ /* no-op */
+ return true;
+}
+
+static void r600_pc_query_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+ int current_se = -1;
+ int current_instance = -1;
+
+ if (query->shaders)
+ pc->emit_shaders(ctx, query->shaders);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+
+ if (group->se != current_se || group->instance != current_instance) {
+ current_se = group->se;
+ current_instance = group->instance;
+ pc->emit_instance(ctx, group->se, group->instance);
+ }
+
+ pc->emit_select(ctx, block, group->num_counters, group->selectors);
+ }
+
+ if (current_se != -1 || current_instance != -1)
+ pc->emit_instance(ctx, -1, -1);
+
+ pc->emit_start(ctx, buffer, va);
+}
+
+static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+
+ pc->emit_stop(ctx, buffer, va);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned se = group->se >= 0 ? group->se : 0;
+ unsigned se_end = se + 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
+ se_end = ctx->screen->info.max_se;
+
+ do {
+ unsigned instance = group->instance >= 0 ? group->instance : 0;
+
+ do {
+ pc->emit_instance(ctx, se, instance);
+ pc->emit_read(ctx, block,
+ group->num_counters, group->selectors,
+ buffer, va);
+ va += sizeof(uint64_t) * group->num_counters;
+ } while (group->instance < 0 && ++instance < block->num_instances);
+ } while (++se < se_end);
+ }
+
+ pc->emit_instance(ctx, -1, -1);
+}
+
+static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+
+ memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
+}
+
+static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
+ struct r600_query_hw *hwquery,
+ void *buffer,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ uint64_t *results = buffer;
+ unsigned i, j;
+
+ for (i = 0; i < query->num_counters; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+
+ for (j = 0; j < counter->qwords; ++j) {
+ uint32_t value = results[counter->base + j * counter->stride];
+ result->batch[i].u64 += value;
+ }
+ }
+}
+
+static struct r600_query_ops batch_query_ops = {
+ .destroy = r600_pc_query_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result
+};
+
+static struct r600_query_hw_ops batch_query_hw_ops = {
+ .prepare_buffer = r600_pc_query_prepare_buffer,
+ .emit_start = r600_pc_query_emit_start,
+ .emit_stop = r600_pc_query_emit_stop,
+ .clear_result = r600_pc_query_clear_result,
+ .add_result = r600_pc_query_add_result,
+};
+
+static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
+ struct r600_query_pc *query,
+ struct r600_perfcounter_block *block,
+ unsigned sub_gid)
+{
+ struct r600_pc_group *group = query->groups;
+
+ while (group) {
+ if (group->block == block && group->sub_gid == sub_gid)
+ return group;
+ group = group->next;
+ }
+
+ group = CALLOC_STRUCT(r600_pc_group);
+ if (!group)
+ return NULL;
+
+ group->block = block;
+ group->sub_gid = sub_gid;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ unsigned sub_gids = block->num_instances;
+ unsigned shader_id;
+ unsigned shaders;
+ unsigned query_shaders;
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ sub_gids = sub_gids * screen->info.max_se;
+ shader_id = sub_gid / sub_gids;
+ sub_gid = sub_gid % sub_gids;
+
+ shaders = screen->perfcounters->shader_type_bits[shader_id];
+
+ query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ if (query_shaders && query_shaders != shaders) {
+ fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
+ FREE(group);
+ return NULL;
+ }
+ query->shaders = shaders;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
+ // A non-zero value in query->shaders ensures that the shader
+ // masking is reset unless the user explicitly requests one.
+ query->shaders = R600_PC_SHADERS_WINDOWING;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ group->se = sub_gid / block->num_instances;
+ sub_gid = sub_gid % block->num_instances;
+ } else {
+ group->se = -1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ group->instance = sub_gid;
+ } else {
+ group->instance = -1;
+ }
+
+ group->next = query->groups;
+ query->groups = group;
+
+ return group;
+}
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types)
+{
+ struct r600_common_screen *screen =
+ (struct r600_common_screen *)ctx->screen;
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ struct r600_pc_group *group;
+ struct r600_query_pc *query;
+ unsigned base_gid, sub_gid, sub_index;
+ unsigned i, j;
+
+ if (!pc)
+ return NULL;
+
+ query = CALLOC_STRUCT(r600_query_pc);
+ if (!query)
+ return NULL;
+
+ query->b.b.ops = &batch_query_ops;
+ query->b.ops = &batch_query_hw_ops;
+
+ query->num_counters = num_queries;
+
+ /* Collect selectors per group */
+ for (i = 0; i < num_queries; ++i) {
+ unsigned sub_gid;
+
+ if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
+ goto error;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+ if (!block)
+ goto error;
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ if (!group)
+ goto error;
+
+ if (group->num_counters >= block->num_counters) {
+ fprintf(stderr,
+ "perfcounter group %s: too many selected\n",
+ block->basename);
+ goto error;
+ }
+ group->selectors[group->num_counters] = sub_index;
+ ++group->num_counters;
+ }
+
+ /* Compute result bases and CS size per group */
+ query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
+ query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
+
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+
+ i = 0;
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned select_dw, read_dw;
+ unsigned instances = 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ instances = screen->info.max_se;
+ if (group->instance < 0)
+ instances *= block->num_instances;
+
+ group->result_base = i;
+ query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
+ i += instances * group->num_counters;
+
+ pc->get_size(block, group->num_counters, group->selectors,
+ &select_dw, &read_dw);
+ query->b.num_cs_dw_begin += select_dw;
+ query->b.num_cs_dw_end += instances * read_dw;
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
+ }
+
+ if (query->shaders) {
+ if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ query->shaders = 0xffffffff;
+ query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
+ }
+
+ /* Map user-supplied query array to result indices */
+ query->counters = CALLOC(num_queries, sizeof(*query->counters));
+ for (i = 0; i < num_queries; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+ struct r600_perfcounter_block *block;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ assert(group != NULL);
+
+ for (j = 0; j < group->num_counters; ++j) {
+ if (group->selectors[j] == sub_index)
+ break;
+ }
+
+ counter->base = group->result_base + j;
+ counter->stride = group->num_counters;
+
+ counter->qwords = 1;
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ counter->qwords = screen->info.max_se;
+ if (group->instance < 0)
+ counter->qwords *= block->num_instances;
+ }
+
+ if (!r600_query_hw_init(screen, &query->b))
+ goto error;
+
+ return (struct pipe_query *)query;
+
+error:
+ r600_pc_query_destroy(screen, &query->b.b);
+ return NULL;
+}
+
+static bool r600_init_block_names(struct r600_common_screen *screen,
+ struct r600_perfcounter_block *block)
+{
+ unsigned i, j, k;
+ unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
+ unsigned namelen;
+ char *groupname;
+ char *p;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ groups_instance = block->num_instances;
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ groups_se = screen->info.max_se;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ groups_shader = screen->perfcounters->num_shader_types;
+
+ namelen = strlen(block->basename);
+ block->group_name_stride = namelen + 1;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ block->group_name_stride += 3;
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ assert(groups_se <= 10);
+ block->group_name_stride += 1;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ block->group_name_stride += 1;
+ }
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ assert(groups_instance <= 100);
+ block->group_name_stride += 2;
+ }
+
+ block->group_names = MALLOC(block->num_groups * block->group_name_stride);
+ if (!block->group_names)
+ return false;
+
+ groupname = block->group_names;
+ for (i = 0; i < groups_shader; ++i) {
+ const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+ unsigned shaderlen = strlen(shader_suffix);
+ for (j = 0; j < groups_se; ++j) {
+ for (k = 0; k < groups_instance; ++k) {
+ strcpy(groupname, block->basename);
+ p = groupname + namelen;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ strcpy(p, shader_suffix);
+ p += shaderlen;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ p += sprintf(p, "%d", j);
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ *p++ = '_';
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ p += sprintf(p, "%d", k);
+
+ groupname += block->group_name_stride;
+ }
+ }
+ }
+
+ assert(block->num_selectors <= 1000);
+ block->selector_name_stride = block->group_name_stride + 4;
+ block->selector_names = MALLOC(block->num_groups * block->num_selectors *
+ block->selector_name_stride);
+ if (!block->selector_names)
+ return false;
+
+ groupname = block->group_names;
+ p = block->selector_names;
+ for (i = 0; i < block->num_groups; ++i) {
+ for (j = 0; j < block->num_selectors; ++j) {
+ sprintf(p, "%s_%03d", groupname, j);
+ p += block->selector_name_stride;
+ }
+ groupname += block->group_name_stride;
+ }
+
+ return true;
+}
+
+int r600_get_perfcounter_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ unsigned base_gid, sub;
+
+ if (!pc)
+ return 0;
+
+ if (!info) {
+ unsigned bid, num_queries = 0;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid) {
+ num_queries += pc->blocks[bid].num_selectors *
+ pc->blocks[bid].num_groups;
+ }
+
+ return num_queries;
+ }
+
+ block = lookup_counter(pc, index, &base_gid, &sub);
+ if (!block)
+ return 0;
+
+ if (!block->selector_names) {
+ if (!r600_init_block_names(screen, block))
+ return 0;
+ }
+ info->name = block->selector_names + sub * block->selector_name_stride;
+ info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
+ info->group_id = base_gid + sub / block->num_selectors;
+ info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+ if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
+ info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
+ return 1;
+}
+
+int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+
+ if (!pc)
+ return 0;
+
+ if (!info)
+ return pc->num_groups;
+
+ block = lookup_group(pc, &index);
+ if (!block)
+ return 0;
+
+ if (!block->group_names) {
+ if (!r600_init_block_names(screen, block))
+ return 0;
+ }
+ info->name = block->group_names + index * block->group_name_stride;
+ info->num_queries = block->num_selectors;
+ info->max_active_queries = block->num_counters;
+ return 1;
+}
+
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
+{
+ if (rscreen->perfcounters)
+ rscreen->perfcounters->cleanup(rscreen);
+}
+
+bool r600_perfcounters_init(struct r600_perfcounters *pc,
+ unsigned num_blocks)
+{
+ pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
+ if (!pc->blocks)
+ return false;
+
+ pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
+ pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
+
+ return true;
+}
+
+void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
+ struct r600_perfcounters *pc,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data)
+{
+ struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
+
+ assert(counters <= R600_QUERY_MAX_COUNTERS);
+
+ block->basename = name;
+ block->flags = flags;
+ block->num_counters = counters;
+ block->num_selectors = selectors;
+ block->num_instances = MAX2(instances, 1);
+ block->data = data;
+
+ if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
+ block->flags |= R600_PC_BLOCK_SE_GROUPS;
+ if (pc->separate_instance && block->num_instances > 1)
+ block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ block->num_groups = block->num_instances;
+ } else {
+ block->num_groups = 1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ block->num_groups *= rscreen->info.max_se;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ block->num_groups *= pc->num_shader_types;
+
+ ++pc->num_blocks;
+ pc->num_groups += block->num_groups;
+}
+
+void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
+{
+ unsigned i;
+
+ for (i = 0; i < pc->num_blocks; ++i) {
+ FREE(pc->blocks[i].group_names);
+ FREE(pc->blocks[i].selector_names);
+ }
+ FREE(pc->blocks);
+ FREE(pc);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c
new file mode 100644
index 000000000..acad670d6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.c
@@ -0,0 +1,1433 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/list.h"
+#include "util/u_draw_quad.h"
+#include "util/u_memory.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_upload_mgr.h"
+#include "os/os_time.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "radeon_video.h"
+#include <inttypes.h>
+#include <sys/utsname.h>
+
+#ifndef HAVE_LLVM
+#define HAVE_LLVM 0
+#endif
+
+#if HAVE_LLVM
+#include <llvm-c/TargetMachine.h>
+#endif
+
+#ifndef MESA_LLVM_VERSION_PATCH
+#define MESA_LLVM_VERSION_PATCH 0
+#endif
+
+struct r600_multi_fence {
+ struct pipe_reference reference;
+ struct pipe_fence_handle *gfx;
+ struct pipe_fence_handle *sdma;
+
+ /* If the context wasn't flushed at fence creation, this is non-NULL. */
+ struct {
+ struct r600_common_context *ctx;
+ unsigned ib_index;
+ } gfx_unflushed;
+};
+
+/*
+ * shader binary helpers.
+ */
+void radeon_shader_binary_init(struct ac_shader_binary *b)
+{
+ memset(b, 0, sizeof(*b));
+}
+
+void radeon_shader_binary_clean(struct ac_shader_binary *b)
+{
+ if (!b)
+ return;
+ FREE(b->code);
+ FREE(b->config);
+ FREE(b->rodata);
+ FREE(b->global_symbol_offsets);
+ FREE(b->relocs);
+ FREE(b->disasm_string);
+ FREE(b->llvm_ir_string);
+}
+
+/*
+ * pipe_context
+ */
+
+/**
+ * Write an EOP event.
+ *
+ * \param event EVENT_TYPE_*
+ * \param event_flags Optional cache flush flags (TC)
+ * \param data_sel 1 = fence, 3 = timestamp
+ * \param buf Buffer
+ * \param va GPU address
+ * \param old_value Previous fence value (for a bug workaround)
+ * \param new_value Fence value to write for this event.
+ */
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ unsigned op = EVENT_TYPE(event) |
+ EVENT_INDEX(5) |
+ event_flags;
+ unsigned sel = EOP_DATA_SEL(data_sel);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, new_fence); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
+
+ if (buf)
+ r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+}
+
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
+{
+ unsigned dwords = 6;
+
+ if (!screen->info.has_virtual_memory)
+ dwords += 2;
+
+ return dwords;
+}
+
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, ref); /* reference value */
+ radeon_emit(cs, mask); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+void r600_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib)
+{
+ struct r600_common_context *rctx =
+ (struct r600_common_context*)util_blitter_get_pipe(blitter);
+ struct pipe_viewport_state viewport;
+ struct pipe_resource *buf = NULL;
+ unsigned offset = 0;
+ float *vb;
+
+ rctx->b.bind_vertex_elements_state(&rctx->b, vertex_elements_cso);
+ rctx->b.bind_vs_state(&rctx->b, get_vs(blitter));
+
+ /* Some operations (like color resolve on r6xx) don't work
+ * with the conventional primitive types.
+ * One that works is PT_RECTLIST, which we use here. */
+
+ /* setup viewport */
+ viewport.scale[0] = 1.0f;
+ viewport.scale[1] = 1.0f;
+ viewport.scale[2] = 1.0f;
+ viewport.translate[0] = 0.0f;
+ viewport.translate[1] = 0.0f;
+ viewport.translate[2] = 0.0f;
+ rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport);
+
+ /* Upload vertices. The hw rectangle has only 3 vertices,
+ * The 4th one is derived from the first 3.
+ * The vertex specification should match u_blitter's vertex element state. */
+ u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24,
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, &buf, (void**)&vb);
+ if (!buf)
+ return;
+
+ vb[0] = x1;
+ vb[1] = y1;
+ vb[2] = depth;
+ vb[3] = 1;
+
+ vb[8] = x1;
+ vb[9] = y2;
+ vb[10] = depth;
+ vb[11] = 1;
+
+ vb[16] = x2;
+ vb[17] = y1;
+ vb[18] = depth;
+ vb[19] = 1;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ memcpy(vb+4, attrib->color, sizeof(float)*4);
+ memcpy(vb+12, attrib->color, sizeof(float)*4);
+ memcpy(vb+20, attrib->color, sizeof(float)*4);
+ break;
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ vb[6] = vb[14] = vb[22] = attrib->texcoord.z;
+ vb[7] = vb[15] = vb[23] = attrib->texcoord.w;
+ /* fall through */
+ vb[4] = attrib->texcoord.x1;
+ vb[5] = attrib->texcoord.y1;
+ vb[12] = attrib->texcoord.x1;
+ vb[13] = attrib->texcoord.y2;
+ vb[20] = attrib->texcoord.x2;
+ vb[21] = attrib->texcoord.y1;
+ break;
+ default:; /* Nothing to do. */
+ }
+
+ /* draw */
+ struct pipe_vertex_buffer vbuffer = {};
+ vbuffer.buffer.resource = buf;
+ vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */
+ vbuffer.buffer_offset = offset;
+
+ rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, &vbuffer);
+ util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
+ 0, num_instances);
+ pipe_resource_reference(&buf, NULL);
+}
+
+static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+
+ if (rctx->chip_class >= EVERGREEN)
+ radeon_emit(cs, 0xf0000000); /* NOP */
+ else {
+ /* TODO: R600-R700 should use the FENCE packet.
+ * CS checker support is required. */
+ }
+}
+
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src)
+{
+ uint64_t vram = ctx->dma.cs->used_vram;
+ uint64_t gtt = ctx->dma.cs->used_gart;
+
+ if (dst) {
+ vram += dst->vram_usage;
+ gtt += dst->gart_usage;
+ }
+ if (src) {
+ vram += src->vram_usage;
+ gtt += src->gart_usage;
+ }
+
+ /* Flush the GFX IB if DMA depends on it. */
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
+ RADEON_USAGE_WRITE))))
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+
+ /* Flush if there's not enough space, or if the memory usage per IB
+ * is too large.
+ *
+ * IBs using too little memory are limited by the IB submission overhead.
+ * IBs using too much memory are limited by the kernel/TTM overhead.
+ * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+ *
+ * This heuristic makes sure that DMA requests are executed
+ * very soon after the call is made and lowers memory usage.
+ * It improves texture upload performance by keeping the DMA
+ * engine busy while uploads are being submitted.
+ */
+ num_dw++; /* for emit_wait_idle below */
+ if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
+ ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
+ !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
+ }
+
+ /* Wait for idle if either buffer has been used in the IB before to
+ * prevent read-after-write hazards.
+ */
+ if ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
+ RADEON_USAGE_WRITE)))
+ r600_dma_emit_wait_idle(ctx);
+
+ /* If GPUVM is not supported, the CS checker needs 2 entries
+ * in the buffer list per packet, which has to be done manually.
+ */
+ if (ctx->screen->info.has_virtual_memory) {
+ if (dst)
+ radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
+ RADEON_USAGE_WRITE,
+ RADEON_PRIO_SDMA_BUFFER);
+ if (src)
+ radeon_add_to_buffer_list(ctx, &ctx->dma, src,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_SDMA_BUFFER);
+ }
+
+ /* this function is called before all DMA calls, so increment this. */
+ ctx->num_dma_calls++;
+}
+
+static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+}
+
+void r600_preflush_suspend_features(struct r600_common_context *ctx)
+{
+ /* suspend queries */
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_suspend_queries(ctx);
+
+ ctx->streamout.suspended = false;
+ if (ctx->streamout.begin_emitted) {
+ r600_emit_streamout_end(ctx);
+ ctx->streamout.suspended = true;
+ }
+}
+
+void r600_postflush_resume_features(struct r600_common_context *ctx)
+{
+ if (ctx->streamout.suspended) {
+ ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+ r600_streamout_buffers_dirty(ctx);
+ }
+
+ /* resume queries */
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_resume_queries(ctx);
+}
+
+static void r600_add_fence_dependency(struct r600_common_context *rctx,
+ struct pipe_fence_handle *fence)
+{
+ struct radeon_winsys *ws = rctx->ws;
+
+ if (rctx->dma.cs)
+ ws->cs_add_fence_dependency(rctx->dma.cs, fence);
+ ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
+}
+
+static void r600_fence_server_sync(struct pipe_context *ctx,
+ struct pipe_fence_handle *fence)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+
+ /* Only amdgpu needs to handle fence dependencies (for fence imports).
+ * radeon synchronizes all rings by default and will not implement
+ * fence imports.
+ */
+ if (rctx->screen->info.drm_major == 2)
+ return;
+
+ /* Only imported fences need to be handled by fence_server_sync,
+ * because the winsys handles synchronizations automatically for BOs
+ * within the process.
+ *
+ * Simply skip unflushed fences here, and the winsys will drop no-op
+ * dependencies (i.e. dependencies within the same ring).
+ */
+ if (rfence->gfx_unflushed.ctx)
+ return;
+
+ /* All unflushed commands will not start execution before
+ * this fence dependency is signalled.
+ *
+ * Should we flush the context to allow more GPU parallelism?
+ */
+ if (rfence->sdma)
+ r600_add_fence_dependency(rctx, rfence->sdma);
+ if (rfence->gfx)
+ r600_add_fence_dependency(rctx, rfence->gfx);
+}
+
+static void r600_flush_from_st(struct pipe_context *ctx,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct pipe_screen *screen = ctx->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct radeon_winsys *ws = rctx->ws;
+ struct pipe_fence_handle *gfx_fence = NULL;
+ struct pipe_fence_handle *sdma_fence = NULL;
+ bool deferred_fence = false;
+ unsigned rflags = RADEON_FLUSH_ASYNC;
+
+ if (flags & PIPE_FLUSH_END_OF_FRAME)
+ rflags |= RADEON_FLUSH_END_OF_FRAME;
+
+ /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
+ if (rctx->dma.cs)
+ rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
+
+ if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
+ if (fence)
+ ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
+ if (!(flags & PIPE_FLUSH_DEFERRED))
+ ws->cs_sync_flush(rctx->gfx.cs);
+ } else {
+ /* Instead of flushing, create a deferred fence. Constraints:
+ * - The state tracker must allow a deferred flush.
+ * - The state tracker must request a fence.
+ * Thread safety in fence_finish must be ensured by the state tracker.
+ */
+ if (flags & PIPE_FLUSH_DEFERRED && fence) {
+ gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+ deferred_fence = true;
+ } else {
+ rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+ }
+ }
+
+ /* Both engines can signal out of order, so we need to keep both fences. */
+ if (fence) {
+ struct r600_multi_fence *multi_fence =
+ CALLOC_STRUCT(r600_multi_fence);
+ if (!multi_fence) {
+ ws->fence_reference(&sdma_fence, NULL);
+ ws->fence_reference(&gfx_fence, NULL);
+ goto finish;
+ }
+
+ multi_fence->reference.count = 1;
+ /* If both fences are NULL, fence_finish will always return true. */
+ multi_fence->gfx = gfx_fence;
+ multi_fence->sdma = sdma_fence;
+
+ if (deferred_fence) {
+ multi_fence->gfx_unflushed.ctx = rctx;
+ multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
+ }
+
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle*)multi_fence;
+ }
+finish:
+ if (!(flags & PIPE_FLUSH_DEFERRED)) {
+ if (rctx->dma.cs)
+ ws->cs_sync_flush(rctx->dma.cs);
+ ws->cs_sync_flush(rctx->gfx.cs);
+ }
+}
+
+static void r600_flush_dma_ring(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+ struct radeon_saved_cs saved;
+ bool check_vm =
+ (rctx->screen->debug_flags & DBG_CHECK_VM) &&
+ rctx->check_vm_faults;
+
+ if (!radeon_emitted(cs, 0)) {
+ if (fence)
+ rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
+ return;
+ }
+
+ if (check_vm)
+ radeon_save_cs(rctx->ws, cs, &saved, true);
+
+ rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
+ if (fence)
+ rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
+
+ if (check_vm) {
+ /* Use conservative timeout 800ms, after which we won't wait any
+ * longer and assume the GPU is hung.
+ */
+ rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
+
+ rctx->check_vm_faults(rctx, &saved, RING_DMA);
+ radeon_clear_saved_cs(&saved);
+ }
+}
+
+/**
+ * Store a linearized copy of all chunks of \p cs together with the buffer
+ * list in \p saved.
+ */
+void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list)
+{
+ uint32_t *buf;
+ unsigned i;
+
+ /* Save the IB chunks. */
+ saved->num_dw = cs->prev_dw + cs->current.cdw;
+ saved->ib = MALLOC(4 * saved->num_dw);
+ if (!saved->ib)
+ goto oom;
+
+ buf = saved->ib;
+ for (i = 0; i < cs->num_prev; ++i) {
+ memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
+ buf += cs->prev[i].cdw;
+ }
+ memcpy(buf, cs->current.buf, cs->current.cdw * 4);
+
+ if (!get_buffer_list)
+ return;
+
+ /* Save the buffer list. */
+ saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
+ saved->bo_list = CALLOC(saved->bo_count,
+ sizeof(saved->bo_list[0]));
+ if (!saved->bo_list) {
+ FREE(saved->ib);
+ goto oom;
+ }
+ ws->cs_get_buffer_list(cs, saved->bo_list);
+
+ return;
+
+oom:
+ fprintf(stderr, "%s: out of memory\n", __func__);
+ memset(saved, 0, sizeof(*saved));
+}
+
+void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
+{
+ FREE(saved->ib);
+ FREE(saved->bo_list);
+
+ memset(saved, 0, sizeof(*saved));
+}
+
+static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned latest = rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+
+ if (rctx->gpu_reset_counter == latest)
+ return PIPE_NO_RESET;
+
+ rctx->gpu_reset_counter = latest;
+ return PIPE_UNKNOWN_CONTEXT_RESET;
+}
+
+static void r600_set_debug_callback(struct pipe_context *ctx,
+ const struct pipe_debug_callback *cb)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ if (cb)
+ rctx->debug = *cb;
+ else
+ memset(&rctx->debug, 0, sizeof(rctx->debug));
+}
+
+static void r600_set_device_reset_callback(struct pipe_context *ctx,
+ const struct pipe_device_reset_callback *cb)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ if (cb)
+ rctx->device_reset_callback = *cb;
+ else
+ memset(&rctx->device_reset_callback, 0,
+ sizeof(rctx->device_reset_callback));
+}
+
+bool r600_check_device_reset(struct r600_common_context *rctx)
+{
+ enum pipe_reset_status status;
+
+ if (!rctx->device_reset_callback.reset)
+ return false;
+
+ if (!rctx->b.get_device_reset_status)
+ return false;
+
+ status = rctx->b.get_device_reset_status(&rctx->b);
+ if (status == PIPE_NO_RESET)
+ return false;
+
+ rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
+ return true;
+}
+
+static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ uint64_t offset, uint64_t size,
+ unsigned value)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
+}
+
+static bool r600_resource_commit(struct pipe_context *pctx,
+ struct pipe_resource *resource,
+ unsigned level, struct pipe_box *box,
+ bool commit)
+{
+ struct r600_common_context *ctx = (struct r600_common_context *)pctx;
+ struct r600_resource *res = r600_resource(resource);
+
+ /*
+ * Since buffer commitment changes cannot be pipelined, we need to
+ * (a) flush any pending commands that refer to the buffer we're about
+ * to change, and
+ * (b) wait for threaded submit to finish, including those that were
+ * triggered by some other, earlier operation.
+ */
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+
+ assert(resource->target == PIPE_BUFFER);
+
+ return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
+}
+
+bool r600_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags)
+{
+ slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
+ slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
+
+ rctx->screen = rscreen;
+ rctx->ws = rscreen->ws;
+ rctx->family = rscreen->family;
+ rctx->chip_class = rscreen->chip_class;
+
+ rctx->b.invalidate_resource = r600_invalidate_resource;
+ rctx->b.resource_commit = r600_resource_commit;
+ rctx->b.transfer_map = u_transfer_map_vtbl;
+ rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
+ rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
+ rctx->b.texture_subdata = u_default_texture_subdata;
+ rctx->b.memory_barrier = r600_memory_barrier;
+ rctx->b.flush = r600_flush_from_st;
+ rctx->b.set_debug_callback = r600_set_debug_callback;
+ rctx->b.fence_server_sync = r600_fence_server_sync;
+ rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
+
+ /* evergreen_compute.c has a special codepath for global buffers.
+ * Everything else can use the direct path.
+ */
+ if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
+ (context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
+ rctx->b.buffer_subdata = u_default_buffer_subdata;
+ else
+ rctx->b.buffer_subdata = r600_buffer_subdata;
+
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
+ rctx->b.get_device_reset_status = r600_get_reset_status;
+ rctx->gpu_reset_counter =
+ rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+ }
+
+ rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
+
+ r600_init_context_texture_functions(rctx);
+ r600_init_viewport_functions(rctx);
+ r600_streamout_init(rctx);
+ r600_query_init(rctx);
+ cayman_init_msaa(&rctx->b);
+
+ rctx->allocator_zeroed_memory =
+ u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
+ 0, PIPE_USAGE_DEFAULT, 0, true);
+ if (!rctx->allocator_zeroed_memory)
+ return false;
+
+ rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
+ 0, PIPE_USAGE_STREAM);
+ if (!rctx->b.stream_uploader)
+ return false;
+
+ rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
+ 0, PIPE_USAGE_DEFAULT);
+ if (!rctx->b.const_uploader)
+ return false;
+
+ rctx->ctx = rctx->ws->ctx_create(rctx->ws);
+ if (!rctx->ctx)
+ return false;
+
+ if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
+ r600_flush_dma_ring,
+ rctx);
+ rctx->dma.flush = r600_flush_dma_ring;
+ }
+
+ return true;
+}
+
+void r600_common_context_cleanup(struct r600_common_context *rctx)
+{
+ if (rctx->query_result_shader)
+ rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
+
+ if (rctx->gfx.cs)
+ rctx->ws->cs_destroy(rctx->gfx.cs);
+ if (rctx->dma.cs)
+ rctx->ws->cs_destroy(rctx->dma.cs);
+ if (rctx->ctx)
+ rctx->ws->ctx_destroy(rctx->ctx);
+
+ if (rctx->b.stream_uploader)
+ u_upload_destroy(rctx->b.stream_uploader);
+ if (rctx->b.const_uploader)
+ u_upload_destroy(rctx->b.const_uploader);
+
+ slab_destroy_child(&rctx->pool_transfers);
+ slab_destroy_child(&rctx->pool_transfers_unsync);
+
+ if (rctx->allocator_zeroed_memory) {
+ u_suballocator_destroy(rctx->allocator_zeroed_memory);
+ }
+ rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
+ rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
+ r600_resource_reference(&rctx->eop_bug_scratch, NULL);
+}
+
+/*
+ * pipe_screen
+ */
+
+static const struct debug_named_value common_debug_options[] = {
+ /* logging */
+ { "tex", DBG_TEX, "Print texture info" },
+ { "nir", DBG_NIR, "Enable experimental NIR shaders" },
+ { "compute", DBG_COMPUTE, "Print compute info" },
+ { "vm", DBG_VM, "Print virtual addresses when creating resources" },
+ { "info", DBG_INFO, "Print driver information" },
+
+ /* shaders */
+ { "fs", DBG_FS, "Print fetch shaders" },
+ { "vs", DBG_VS, "Print vertex shaders" },
+ { "gs", DBG_GS, "Print geometry shaders" },
+ { "ps", DBG_PS, "Print pixel shaders" },
+ { "cs", DBG_CS, "Print compute shaders" },
+ { "tcs", DBG_TCS, "Print tessellation control shaders" },
+ { "tes", DBG_TES, "Print tessellation evaluation shaders" },
+ { "noir", DBG_NO_IR, "Don't print the LLVM IR"},
+ { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
+ { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+ { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
+ { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" },
+ { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." },
+
+ { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." },
+ { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." },
+ { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." },
+ { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." },
+
+ /* features */
+ { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
+ { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" },
+ /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
+ { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
+ { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
+ { "notiling", DBG_NO_TILING, "Disable tiling" },
+ { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
+ { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
+ { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+ { "nowc", DBG_NO_WC, "Disable GTT write combining" },
+ { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
+ { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
+
+ DEBUG_NAMED_VALUE_END /* must be last */
+};
+
+static const char* r600_get_vendor(struct pipe_screen* pscreen)
+{
+ return "X.Org";
+}
+
+static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
+{
+ return "AMD";
+}
+
+static const char *r600_get_marketing_name(struct radeon_winsys *ws)
+{
+ if (!ws->get_chip_name)
+ return NULL;
+ return ws->get_chip_name(ws);
+}
+
+static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
+{
+ switch (rscreen->info.family) {
+ case CHIP_R600: return "AMD R600";
+ case CHIP_RV610: return "AMD RV610";
+ case CHIP_RV630: return "AMD RV630";
+ case CHIP_RV670: return "AMD RV670";
+ case CHIP_RV620: return "AMD RV620";
+ case CHIP_RV635: return "AMD RV635";
+ case CHIP_RS780: return "AMD RS780";
+ case CHIP_RS880: return "AMD RS880";
+ case CHIP_RV770: return "AMD RV770";
+ case CHIP_RV730: return "AMD RV730";
+ case CHIP_RV710: return "AMD RV710";
+ case CHIP_RV740: return "AMD RV740";
+ case CHIP_CEDAR: return "AMD CEDAR";
+ case CHIP_REDWOOD: return "AMD REDWOOD";
+ case CHIP_JUNIPER: return "AMD JUNIPER";
+ case CHIP_CYPRESS: return "AMD CYPRESS";
+ case CHIP_HEMLOCK: return "AMD HEMLOCK";
+ case CHIP_PALM: return "AMD PALM";
+ case CHIP_SUMO: return "AMD SUMO";
+ case CHIP_SUMO2: return "AMD SUMO2";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
+ case CHIP_CAYMAN: return "AMD CAYMAN";
+ case CHIP_ARUBA: return "AMD ARUBA";
+ default: return "AMD unknown";
+ }
+}
+
+static void r600_disk_cache_create(struct r600_common_screen *rscreen)
+{
+ /* Don't use the cache if shader dumping is enabled. */
+ if (rscreen->debug_flags & DBG_ALL_SHADERS)
+ return;
+
+ uint32_t mesa_timestamp;
+ if (disk_cache_get_function_timestamp(r600_disk_cache_create,
+ &mesa_timestamp)) {
+ char *timestamp_str;
+ int res = -1;
+
+ res = asprintf(&timestamp_str, "%u",mesa_timestamp);
+ if (res != -1) {
+ /* These flags affect shader compilation. */
+ uint64_t shader_debug_flags =
+ rscreen->debug_flags &
+ (DBG_FS_CORRECT_DERIVS_AFTER_KILL |
+ DBG_UNSAFE_MATH);
+
+ rscreen->disk_shader_cache =
+ disk_cache_create(r600_get_family_name(rscreen),
+ timestamp_str,
+ shader_debug_flags);
+ free(timestamp_str);
+ }
+ }
+}
+
+static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+ return rscreen->disk_shader_cache;
+}
+
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+
+ return rscreen->renderer_string;
+}
+
+static float r600_get_paramf(struct pipe_screen* pscreen,
+ enum pipe_capf param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
+
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ if (rscreen->family >= CHIP_CEDAR)
+ return 16384.0f;
+ else
+ return 8192.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0f;
+ }
+ return 0.0f;
+}
+
+static int r600_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return vl_profile_supported(screen, profile, entrypoint);
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return vl_video_buffer_max_size(screen);
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ return vl_level_supported(screen, profile);
+ default:
+ return 0;
+ }
+}
+
+const char *r600_get_llvm_processor_name(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_R600:
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV670:
+ return "r600";
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ return "rs880";
+ case CHIP_RV710:
+ return "rv710";
+ case CHIP_RV730:
+ return "rv730";
+ case CHIP_RV740:
+ case CHIP_RV770:
+ return "rv770";
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return "cedar";
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ return "sumo";
+ case CHIP_REDWOOD:
+ return "redwood";
+ case CHIP_JUNIPER:
+ return "juniper";
+ case CHIP_HEMLOCK:
+ case CHIP_CYPRESS:
+ return "cypress";
+ case CHIP_BARTS:
+ return "barts";
+ case CHIP_TURKS:
+ return "turks";
+ case CHIP_CAICOS:
+ return "caicos";
+ case CHIP_CAYMAN:
+ case CHIP_ARUBA:
+ return "cayman";
+
+ default:
+ return "";
+ }
+}
+
+static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
+ enum pipe_shader_ir ir_type)
+{
+ return 256;
+}
+
+static int r600_get_compute_param(struct pipe_screen *screen,
+ enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param,
+ void *ret)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+
+ //TODO: select these params by asic
+ switch (param) {
+ case PIPE_COMPUTE_CAP_IR_TARGET: {
+ const char *gpu;
+ const char *triple = "r600--";
+ gpu = r600_get_llvm_processor_name(rscreen->family);
+ if (ret) {
+ sprintf(ret, "%s-%s", gpu, triple);
+ }
+ /* +2 for dash and terminating NIL byte */
+ return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
+ }
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t *grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 65535;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t *block_size = ret;
+ unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ block_size[0] = threads_per_block;
+ block_size[1] = threads_per_block;
+ block_size[2] = threads_per_block;
+ }
+ return 3 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_threads_per_block = ret;
+ *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ if (ret) {
+ uint32_t *address_bits = ret;
+ address_bits[0] = 32;
+ }
+ return 1 * sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ if (ret) {
+ uint64_t *max_global_size = ret;
+ uint64_t max_mem_alloc_size;
+
+ r600_get_compute_param(screen, ir_type,
+ PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+ &max_mem_alloc_size);
+
+ /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+ * 1/4 of the MAX_GLOBAL_SIZE. Since the
+ * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+ * make sure we never report more than
+ * 4 * MAX_MEM_ALLOC_SIZE.
+ */
+ *max_global_size = MIN2(4 * max_mem_alloc_size,
+ MAX2(rscreen->info.gart_size,
+ rscreen->info.vram_size));
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ if (ret) {
+ uint64_t *max_local_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_local_size = 32768;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ if (ret) {
+ uint64_t *max_input_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_input_size = 1024;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ if (ret) {
+ uint64_t *max_mem_alloc_size = ret;
+
+ *max_mem_alloc_size = rscreen->info.max_alloc_size;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ if (ret) {
+ uint32_t *max_clock_frequency = ret;
+ *max_clock_frequency = rscreen->info.max_shader_clock;
+ }
+ return sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ if (ret) {
+ uint32_t *max_compute_units = ret;
+ *max_compute_units = rscreen->info.num_good_compute_units;
+ }
+ return sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ if (ret) {
+ uint32_t *images_supported = ret;
+ *images_supported = 0;
+ }
+ return sizeof(uint32_t);
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ break; /* unused */
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ if (ret) {
+ uint32_t *subgroup_size = ret;
+ *subgroup_size = r600_wavefront_size(rscreen->family);
+ }
+ return sizeof(uint32_t);
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_variable_threads_per_block = ret;
+ *max_variable_threads_per_block = 0;
+ }
+ return sizeof(uint64_t);
+ }
+
+ fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+ return 0;
+}
+
+static uint64_t r600_get_timestamp(struct pipe_screen *screen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+ return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
+ rscreen->info.clock_crystal_freq;
+}
+
+static void r600_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
+{
+ struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
+ struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
+
+ if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+ ws->fence_reference(&(*rdst)->gfx, NULL);
+ ws->fence_reference(&(*rdst)->sdma, NULL);
+ FREE(*rdst);
+ }
+ *rdst = rsrc;
+}
+
+static boolean r600_fence_finish(struct pipe_screen *screen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+ struct r600_common_context *rctx;
+ int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+
+ ctx = threaded_context_unwrap_sync(ctx);
+ rctx = ctx ? (struct r600_common_context*)ctx : NULL;
+
+ if (rfence->sdma) {
+ if (!rws->fence_wait(rws, rfence->sdma, timeout))
+ return false;
+
+ /* Recompute the timeout after waiting. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
+ if (!rfence->gfx)
+ return true;
+
+ /* Flush the gfx IB if it hasn't been flushed yet. */
+ if (rctx &&
+ rfence->gfx_unflushed.ctx == rctx &&
+ rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
+ rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+ rfence->gfx_unflushed.ctx = NULL;
+
+ if (!timeout)
+ return false;
+
+ /* Recompute the timeout after all that. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
+ return rws->fence_wait(rws, rfence->gfx, timeout);
+}
+
+static void r600_query_memory_info(struct pipe_screen *screen,
+ struct pipe_memory_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ unsigned vram_usage, gtt_usage;
+
+ info->total_device_memory = rscreen->info.vram_size / 1024;
+ info->total_staging_memory = rscreen->info.gart_size / 1024;
+
+ /* The real TTM memory usage is somewhat random, because:
+ *
+ * 1) TTM delays freeing memory, because it can only free it after
+ * fences expire.
+ *
+ * 2) The memory usage can be really low if big VRAM evictions are
+ * taking place, but the real usage is well above the size of VRAM.
+ *
+ * Instead, return statistics of this process.
+ */
+ vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+ gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+ info->avail_device_memory =
+ vram_usage <= info->total_device_memory ?
+ info->total_device_memory - vram_usage : 0;
+ info->avail_staging_memory =
+ gtt_usage <= info->total_staging_memory ?
+ info->total_staging_memory - gtt_usage : 0;
+
+ info->device_memory_evicted =
+ ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+
+ if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
+ info->nr_device_memory_evictions =
+ ws->query_value(ws, RADEON_NUM_EVICTIONS);
+ else
+ /* Just return the number of evicted 64KB pages. */
+ info->nr_device_memory_evictions = info->device_memory_evicted / 64;
+}
+
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return r600_buffer_create(screen, templ, 256);
+ } else {
+ return r600_texture_create(screen, templ);
+ }
+}
+
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws)
+{
+ char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
+ struct utsname uname_data;
+ const char *chip_name;
+
+ ws->query_info(ws, &rscreen->info);
+ rscreen->ws = ws;
+
+ if ((chip_name = r600_get_marketing_name(ws)))
+ snprintf(family_name, sizeof(family_name), "%s / ",
+ r600_get_family_name(rscreen) + 4);
+ else
+ chip_name = r600_get_family_name(rscreen);
+
+ if (uname(&uname_data) == 0)
+ snprintf(kernel_version, sizeof(kernel_version),
+ " / %s", uname_data.release);
+
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+ }
+
+ snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
+ "%s (%sDRM %i.%i.%i%s%s)",
+ chip_name, family_name, rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
+ kernel_version, llvm_string);
+
+ rscreen->b.get_name = r600_get_name;
+ rscreen->b.get_vendor = r600_get_vendor;
+ rscreen->b.get_device_vendor = r600_get_device_vendor;
+ rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
+ rscreen->b.get_compute_param = r600_get_compute_param;
+ rscreen->b.get_paramf = r600_get_paramf;
+ rscreen->b.get_timestamp = r600_get_timestamp;
+ rscreen->b.fence_finish = r600_fence_finish;
+ rscreen->b.fence_reference = r600_fence_reference;
+ rscreen->b.resource_destroy = u_resource_destroy_vtbl;
+ rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.query_memory_info = r600_query_memory_info;
+
+ if (rscreen->info.has_hw_decode) {
+ rscreen->b.get_video_param = rvid_get_video_param;
+ rscreen->b.is_video_format_supported = rvid_is_format_supported;
+ } else {
+ rscreen->b.get_video_param = r600_get_video_param;
+ rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
+ }
+
+ r600_init_screen_texture_functions(rscreen);
+ r600_init_screen_query_functions(rscreen);
+
+ rscreen->family = rscreen->info.family;
+ rscreen->chip_class = rscreen->info.chip_class;
+ rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+
+ r600_disk_cache_create(rscreen);
+
+ slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
+
+ rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
+ if (rscreen->force_aniso >= 0) {
+ printf("radeon: Forcing anisotropy filter to %ix\n",
+ /* round down to a power of two */
+ 1 << util_logbase2(rscreen->force_aniso));
+ }
+
+ (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
+ (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
+
+ if (rscreen->debug_flags & DBG_INFO) {
+ printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
+ rscreen->info.pci_domain, rscreen->info.pci_bus,
+ rscreen->info.pci_dev, rscreen->info.pci_func);
+ printf("pci_id = 0x%x\n", rscreen->info.pci_id);
+ printf("family = %i (%s)\n", rscreen->info.family,
+ r600_get_family_name(rscreen));
+ printf("chip_class = %i\n", rscreen->info.chip_class);
+ printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
+ printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
+ printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
+ printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
+ printf("max_alloc_size = %i MB\n",
+ (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
+ printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
+ printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
+ printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
+ printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
+ printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
+ printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
+ printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
+ printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
+ printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
+ printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
+ printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
+ printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
+ printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
+ printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+ printf("has_userptr = %i\n", rscreen->info.has_userptr);
+ printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
+
+ printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
+ printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
+ printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
+ printf("max_se = %i\n", rscreen->info.max_se);
+ printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
+
+ printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
+ printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
+ printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
+ printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
+ printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
+ printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
+ printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
+ }
+ return true;
+}
+
+void r600_destroy_common_screen(struct r600_common_screen *rscreen)
+{
+ r600_perfcounters_destroy(rscreen);
+ r600_gpu_load_kill_thread(rscreen);
+
+ mtx_destroy(&rscreen->gpu_load_mutex);
+ mtx_destroy(&rscreen->aux_context_lock);
+ rscreen->aux_context->destroy(rscreen->aux_context);
+
+ slab_destroy_parent(&rscreen->pool_transfers);
+
+ disk_cache_destroy(rscreen->disk_shader_cache);
+ rscreen->ws->destroy(rscreen->ws);
+ FREE(rscreen);
+}
+
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor)
+{
+ return rscreen->debug_flags & (1 << processor);
+}
+
+bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
+{
+ return (rscreen->debug_flags & DBG_CHECK_IR) ||
+ r600_can_dump_shader(rscreen, processor);
+}
+
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
+
+ mtx_lock(&rscreen->aux_context_lock);
+ rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
+ rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
+ mtx_unlock(&rscreen->aux_context_lock);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h
new file mode 100644
index 000000000..a6406cfdb
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe_common.h
@@ -0,0 +1,932 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+/**
+ * This file contains common screen and context structures and functions
+ * for r600g and radeonsi.
+ */
+
+#ifndef R600_PIPE_COMMON_H
+#define R600_PIPE_COMMON_H
+
+#include <stdio.h>
+
+#include "amd/common/ac_binary.h"
+
+#include "radeon/radeon_winsys.h"
+
+#include "util/disk_cache.h"
+#include "util/u_blitter.h"
+#include "util/list.h"
+#include "util/u_range.h"
+#include "util/slab.h"
+#include "util/u_suballoc.h"
+#include "util/u_transfer.h"
+#include "util/u_threaded_context.h"
+
+struct u_log_context;
+
+#define ATI_VENDOR_ID 0x1002
+
+#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
+
+#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
+/* Pipeline & streamout query controls. */
+#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
+#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
+
+/* special primitive types */
+#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
+
+#define R600_NOT_QUERY 0xffffffff
+
+/* Debug flags. */
+#define DBG_VS (1 << PIPE_SHADER_VERTEX)
+#define DBG_PS (1 << PIPE_SHADER_FRAGMENT)
+#define DBG_GS (1 << PIPE_SHADER_GEOMETRY)
+#define DBG_TCS (1 << PIPE_SHADER_TESS_CTRL)
+#define DBG_TES (1 << PIPE_SHADER_TESS_EVAL)
+#define DBG_CS (1 << PIPE_SHADER_COMPUTE)
+#define DBG_ALL_SHADERS (DBG_FS - 1)
+#define DBG_FS (1 << 6) /* fetch shader */
+#define DBG_TEX (1 << 7)
+#define DBG_NIR (1 << 8)
+#define DBG_COMPUTE (1 << 9)
+/* gap */
+#define DBG_VM (1 << 11)
+#define DBG_NO_IR (1 << 12)
+#define DBG_NO_TGSI (1 << 13)
+#define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR (1 << 15)
+#define DBG_CHECK_IR (1 << 16)
+#define DBG_NO_OPT_VARIANT (1 << 17)
+#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
+/* gaps */
+#define DBG_TEST_DMA (1 << 20)
+/* Bits 21-31 are reserved for the r600g driver. */
+/* features */
+#define DBG_NO_ASYNC_DMA (1ull << 32)
+#define DBG_NO_HYPERZ (1ull << 33)
+#define DBG_NO_DISCARD_RANGE (1ull << 34)
+#define DBG_NO_2D_TILING (1ull << 35)
+#define DBG_NO_TILING (1ull << 36)
+#define DBG_SWITCH_ON_EOP (1ull << 37)
+#define DBG_FORCE_DMA (1ull << 38)
+#define DBG_PRECOMPILE (1ull << 39)
+#define DBG_INFO (1ull << 40)
+#define DBG_NO_WC (1ull << 41)
+#define DBG_CHECK_VM (1ull << 42)
+/* gap */
+#define DBG_UNSAFE_MATH (1ull << 49)
+#define DBG_TEST_VMFAULT_CP (1ull << 51)
+#define DBG_TEST_VMFAULT_SDMA (1ull << 52)
+#define DBG_TEST_VMFAULT_SHADER (1ull << 53)
+
+#define R600_MAP_BUFFER_ALIGNMENT 64
+#define R600_MAX_VIEWPORTS 16
+
+#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+
+enum r600_coherency {
+ R600_COHERENCY_NONE, /* no cache flushes needed */
+ R600_COHERENCY_SHADER,
+ R600_COHERENCY_CB_META,
+};
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define R600_BIG_ENDIAN 1
+#else
+#define R600_BIG_ENDIAN 0
+#endif
+
+struct r600_common_context;
+struct r600_perfcounters;
+struct tgsi_shader_info;
+struct r600_qbo_state;
+
+void radeon_shader_binary_init(struct ac_shader_binary *b);
+void radeon_shader_binary_clean(struct ac_shader_binary *b);
+
+/* Only 32-bit buffer allocations are supported, gallium doesn't support more
+ * at the moment.
+ */
+struct r600_resource {
+ struct threaded_resource b;
+
+ /* Winsys objects. */
+ struct pb_buffer *buf;
+ uint64_t gpu_address;
+ /* Memory usage if the buffer placement is optimal. */
+ uint64_t vram_usage;
+ uint64_t gart_usage;
+
+ /* Resource properties. */
+ uint64_t bo_size;
+ unsigned bo_alignment;
+ enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags;
+ unsigned bind_history;
+
+ /* The buffer range which is initialized (with a write transfer,
+ * streamout, DMA, or as a random access target). The rest of
+ * the buffer is considered invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't
+ * been used yet. It's for applications which forget to use
+ * the unsynchronized map flag and expect the driver to figure it out.
+ */
+ struct util_range valid_buffer_range;
+
+ /* Whether the resource has been exported via resource_get_handle. */
+ unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
+
+ /* Whether this resource is referenced by bindless handles. */
+ bool texture_handle_allocated;
+ bool image_handle_allocated;
+};
+
+struct r600_transfer {
+ struct threaded_transfer b;
+ struct r600_resource *staging;
+ unsigned offset;
+};
+
+struct r600_fmask_info {
+ uint64_t offset;
+ uint64_t size;
+ unsigned alignment;
+ unsigned pitch_in_pixels;
+ unsigned bank_height;
+ unsigned slice_tile_max;
+ unsigned tile_mode_index;
+ unsigned tile_swizzle;
+};
+
+struct r600_cmask_info {
+ uint64_t offset;
+ uint64_t size;
+ unsigned alignment;
+ unsigned slice_tile_max;
+ uint64_t base_address_reg;
+};
+
+struct r600_texture {
+ struct r600_resource resource;
+
+ uint64_t size;
+ unsigned num_level0_transfers;
+ enum pipe_format db_render_format;
+ bool is_depth;
+ bool db_compatible;
+ bool can_sample_z;
+ bool can_sample_s;
+ unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
+ unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
+ struct r600_texture *flushed_depth_texture;
+ struct radeon_surf surface;
+
+ /* Colorbuffer compression and fast clear. */
+ struct r600_fmask_info fmask;
+ struct r600_cmask_info cmask;
+ struct r600_resource *cmask_buffer;
+ unsigned cb_color_info; /* fast clear enable bit */
+ unsigned color_clear_value[2];
+ unsigned last_msaa_resolve_target_micro_mode;
+
+ /* Depth buffer compression and fast clear. */
+ uint64_t htile_offset;
+ bool depth_cleared; /* if it was cleared at least once */
+ float depth_clear_value;
+ bool stencil_cleared; /* if it was cleared at least once */
+ uint8_t stencil_clear_value;
+
+ bool non_disp_tiling; /* R600-Cayman only */
+
+ /* Counter that should be non-zero if the texture is bound to a
+ * framebuffer. Implemented in radeonsi only.
+ */
+ uint32_t framebuffers_bound;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+
+ /* These can vary with block-compressed textures. */
+ unsigned width0;
+ unsigned height0;
+
+ bool color_initialized;
+ bool depth_initialized;
+
+ /* Misc. color flags. */
+ bool alphatest_bypass;
+ bool export_16bpc;
+ bool color_is_int8;
+ bool color_is_int10;
+
+ /* Color registers. */
+ unsigned cb_color_info;
+ unsigned cb_color_base;
+ unsigned cb_color_view;
+ unsigned cb_color_size; /* R600 only */
+ unsigned cb_color_dim; /* EG only */
+ unsigned cb_color_pitch; /* EG and later */
+ unsigned cb_color_slice; /* EG and later */
+ unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
+ unsigned cb_color_fmask_slice; /* EG and later */
+ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
+ unsigned cb_color_mask; /* R600 only */
+ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
+ struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
+
+ /* DB registers. */
+ uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
+ uint64_t db_stencil_base; /* EG and later */
+ uint64_t db_htile_data_base;
+ unsigned db_depth_info; /* R600 only, then SI and later */
+ unsigned db_z_info; /* EG and later */
+ unsigned db_depth_view;
+ unsigned db_depth_size;
+ unsigned db_depth_slice; /* EG and later */
+ unsigned db_stencil_info; /* EG and later */
+ unsigned db_prefetch_limit; /* R600 only */
+ unsigned db_htile_surface;
+ unsigned db_preload_control; /* EG and later */
+};
+
+struct r600_mmio_counter {
+ unsigned busy;
+ unsigned idle;
+};
+
+union r600_mmio_counters {
+ struct {
+ /* For global GPU load including SDMA. */
+ struct r600_mmio_counter gpu;
+
+ /* GRBM_STATUS */
+ struct r600_mmio_counter spi;
+ struct r600_mmio_counter gui;
+ struct r600_mmio_counter ta;
+ struct r600_mmio_counter gds;
+ struct r600_mmio_counter vgt;
+ struct r600_mmio_counter ia;
+ struct r600_mmio_counter sx;
+ struct r600_mmio_counter wd;
+ struct r600_mmio_counter bci;
+ struct r600_mmio_counter sc;
+ struct r600_mmio_counter pa;
+ struct r600_mmio_counter db;
+ struct r600_mmio_counter cp;
+ struct r600_mmio_counter cb;
+
+ /* SRBM_STATUS2 */
+ struct r600_mmio_counter sdma;
+
+ /* CP_STAT */
+ struct r600_mmio_counter pfp;
+ struct r600_mmio_counter meq;
+ struct r600_mmio_counter me;
+ struct r600_mmio_counter surf_sync;
+ struct r600_mmio_counter cp_dma;
+ struct r600_mmio_counter scratch_ram;
+ } named;
+ unsigned array[0];
+};
+
+struct r600_memory_object {
+ struct pipe_memory_object b;
+ struct pb_buffer *buf;
+ uint32_t stride;
+ uint32_t offset;
+};
+
+struct r600_common_screen {
+ struct pipe_screen b;
+ struct radeon_winsys *ws;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ struct radeon_info info;
+ uint64_t debug_flags;
+ bool has_cp_dma;
+ bool has_streamout;
+
+ struct disk_cache *disk_shader_cache;
+
+ struct slab_parent_pool pool_transfers;
+
+ /* Texture filter settings. */
+ int force_aniso; /* -1 = disabled */
+
+ /* Auxiliary context. Mainly used to initialize resources.
+ * It must be locked prior to using and flushed before unlocking. */
+ struct pipe_context *aux_context;
+ mtx_t aux_context_lock;
+
+ /* This must be in the screen, because UE4 uses one context for
+ * compilation and another one for rendering.
+ */
+ unsigned num_compilations;
+ /* Along with ST_DEBUG=precompile, this should show if applications
+ * are loading shaders on demand. This is a monotonic counter.
+ */
+ unsigned num_shaders_created;
+ unsigned num_shader_cache_hits;
+
+ /* GPU load thread. */
+ mtx_t gpu_load_mutex;
+ thrd_t gpu_load_thread;
+ union r600_mmio_counters mmio_counters;
+ volatile unsigned gpu_load_stop_thread; /* bool */
+
+ char renderer_string[100];
+
+ /* Performance counters. */
+ struct r600_perfcounters *perfcounters;
+
+ /* If pipe_screen wants to recompute and re-emit the framebuffer,
+ * sampler, and image states of all contexts, it should atomically
+ * increment this.
+ *
+ * Each context will compare this with its own last known value of
+ * the counter before drawing and re-emit the states accordingly.
+ */
+ unsigned dirty_tex_counter;
+
+ /* Atomically increment this counter when an existing texture's
+ * metadata is enabled or disabled in a way that requires changing
+ * contexts' compressed texture binding masks.
+ */
+ unsigned compressed_colortex_counter;
+
+ struct {
+ /* Context flags to set so that all writes from earlier jobs
+ * in the CP are seen by L2 clients.
+ */
+ unsigned cp_to_L2;
+
+ /* Context flags to set so that all writes from earlier jobs
+ * that end in L2 are seen by CP.
+ */
+ unsigned L2_to_cp;
+
+ /* Context flags to set so that all writes from earlier
+ * compute jobs are seen by L2 clients.
+ */
+ unsigned compute_to_L2;
+ } barrier_flags;
+
+ void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *md);
+
+ void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *md);
+};
+
+/* This encapsulates a state or an operation which can emitted into the GPU
+ * command stream. */
+struct r600_atom {
+ void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
+ unsigned num_dw;
+ unsigned short id;
+};
+
+struct r600_so_target {
+ struct pipe_stream_output_target b;
+
+ /* The buffer where BUFFER_FILLED_SIZE is stored. */
+ struct r600_resource *buf_filled_size;
+ unsigned buf_filled_size_offset;
+ bool buf_filled_size_valid;
+
+ unsigned stride_in_dw;
+};
+
+struct r600_streamout {
+ struct r600_atom begin_atom;
+ bool begin_emitted;
+ unsigned num_dw_for_end;
+
+ unsigned enabled_mask;
+ unsigned num_targets;
+ struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
+
+ unsigned append_bitmask;
+ bool suspended;
+
+ /* External state which comes from the vertex shader,
+ * it must be set explicitly when binding a shader. */
+ uint16_t *stride_in_dw;
+ unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+ unsigned hw_enabled_mask;
+
+ /* The state of VGT_STRMOUT_(CONFIG|EN). */
+ struct r600_atom enable_atom;
+ bool streamout_enabled;
+ bool prims_gen_query_enabled;
+ int num_prims_gen_queries;
+};
+
+struct r600_signed_scissor {
+ int minx;
+ int miny;
+ int maxx;
+ int maxy;
+};
+
+struct r600_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
+};
+
+struct r600_viewports {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ unsigned depth_range_dirty_mask;
+ struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
+ struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
+};
+
+struct r600_ring {
+ struct radeon_winsys_cs *cs;
+ void (*flush)(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence);
+};
+
+/* Saved CS data for debugging features. */
+struct radeon_saved_cs {
+ uint32_t *ib;
+ unsigned num_dw;
+
+ struct radeon_bo_list_item *bo_list;
+ unsigned bo_count;
+};
+
+struct r600_common_context {
+ struct pipe_context b; /* base class */
+
+ struct r600_common_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_ctx *ctx;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ struct r600_ring gfx;
+ struct r600_ring dma;
+ struct pipe_fence_handle *last_gfx_fence;
+ struct pipe_fence_handle *last_sdma_fence;
+ struct r600_resource *eop_bug_scratch;
+ unsigned num_gfx_cs_flushes;
+ unsigned initial_gfx_cs_size;
+ unsigned gpu_reset_counter;
+ unsigned last_dirty_tex_counter;
+ unsigned last_compressed_colortex_counter;
+ unsigned last_num_draw_calls;
+
+ struct threaded_context *tc;
+ struct u_suballocator *allocator_zeroed_memory;
+ struct slab_child_pool pool_transfers;
+ struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
+
+ /* Current unaccounted memory usage. */
+ uint64_t vram;
+ uint64_t gtt;
+
+ /* States. */
+ struct r600_streamout streamout;
+ struct r600_scissors scissors;
+ struct r600_viewports viewports;
+ bool scissor_enabled;
+ bool clip_halfz;
+ bool vs_writes_viewport_index;
+ bool vs_disables_clipping_viewport;
+
+ /* Additional context states. */
+ unsigned flags; /* flush flags */
+
+ /* Queries. */
+ /* Maintain the list of active queries for pausing between IBs. */
+ int num_occlusion_queries;
+ int num_perfect_occlusion_queries;
+ struct list_head active_queries;
+ unsigned num_cs_dw_queries_suspend;
+ /* Misc stats. */
+ unsigned num_draw_calls;
+ unsigned num_decompress_calls;
+ unsigned num_mrt_draw_calls;
+ unsigned num_prim_restart_calls;
+ unsigned num_spill_draw_calls;
+ unsigned num_compute_calls;
+ unsigned num_spill_compute_calls;
+ unsigned num_dma_calls;
+ unsigned num_cp_dma_calls;
+ unsigned num_vs_flushes;
+ unsigned num_ps_flushes;
+ unsigned num_cs_flushes;
+ unsigned num_cb_cache_flushes;
+ unsigned num_db_cache_flushes;
+ unsigned num_L2_invalidates;
+ unsigned num_L2_writebacks;
+ unsigned num_resident_handles;
+ uint64_t num_alloc_tex_transfer_bytes;
+
+ /* Render condition. */
+ struct r600_atom render_cond_atom;
+ struct pipe_query *render_cond;
+ unsigned render_cond_mode;
+ bool render_cond_invert;
+ bool render_cond_force_off; /* for u_blitter */
+
+ /* MSAA sample locations.
+ * The first index is the sample index.
+ * The second index is the coordinate: X, Y. */
+ float sample_locations_1x[1][2];
+ float sample_locations_2x[2][2];
+ float sample_locations_4x[4][2];
+ float sample_locations_8x[8][2];
+ float sample_locations_16x[16][2];
+
+ struct pipe_debug_callback debug;
+ struct pipe_device_reset_callback device_reset_callback;
+ struct u_log_context *log;
+
+ void *query_result_shader;
+
+ /* Copy one resource to another using async DMA. */
+ void (*dma_copy)(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dst_x, unsigned dst_y, unsigned dst_z,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+
+ void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+
+ void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value,
+ enum r600_coherency coher);
+
+ void (*blit_decompress_depth)(struct pipe_context *ctx,
+ struct r600_texture *texture,
+ struct r600_texture *staging,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer,
+ unsigned first_sample, unsigned last_sample);
+
+ /* Reallocate the buffer and update all resource bindings where
+ * the buffer is bound, including all resource descriptors. */
+ void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
+
+ /* Update all resource bindings where the buffer is bound, including
+ * all resource descriptors. This is invalidate_buffer without
+ * the invalidation. */
+ void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
+ uint64_t old_gpu_address);
+
+ void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
+
+ /* This ensures there is enough space in the command stream. */
+ void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
+ bool include_draw_vbo);
+
+ void (*set_atom_dirty)(struct r600_common_context *ctx,
+ struct r600_atom *atom, bool dirty);
+
+ void (*check_vm_faults)(struct r600_common_context *ctx,
+ struct radeon_saved_cs *saved,
+ enum ring_type ring);
+};
+
+/* r600_buffer_common.c */
+bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage);
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage);
+void r600_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data);
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment);
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res);
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment);
+struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment);
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory);
+void
+r600_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource);
+void r600_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src);
+
+/* r600_common_pipe.c */
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type);
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask);
+void r600_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib);
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws);
+void r600_destroy_common_screen(struct r600_common_screen *rscreen);
+void r600_preflush_suspend_features(struct r600_common_context *ctx);
+void r600_postflush_resume_features(struct r600_common_context *ctx);
+bool r600_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags);
+void r600_common_context_cleanup(struct r600_common_context *rctx);
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor);
+bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
+ unsigned processor);
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+const char *r600_get_llvm_processor_name(enum radeon_family family);
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src);
+void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list);
+void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
+bool r600_check_device_reset(struct r600_common_context *rctx);
+
+/* r600_gpu_load.c */
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin);
+
+/* r600_perfcounters.c */
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
+
+/* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
+void r600_query_init(struct r600_common_context *rctx);
+void r600_suspend_queries(struct r600_common_context *ctx);
+void r600_resume_queries(struct r600_common_context *ctx);
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
+
+/* r600_streamout.c */
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
+void r600_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offset);
+void r600_emit_streamout_end(struct r600_common_context *rctx);
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff);
+void r600_streamout_init(struct r600_common_context *rctx);
+
+/* r600_test_dma.c */
+void r600_test_dma(struct r600_common_screen *rscreen);
+
+/* r600_texture.c */
+bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out);
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct r600_cmask_info *out);
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging);
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log);
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height);
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+ struct pipe_framebuffer_state *fb,
+ struct r600_atom *fb_state,
+ unsigned *buffers, ubyte *dirty_cbufs,
+ const union pipe_color_union *color);
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
+void r600_init_context_texture_functions(struct r600_common_context *rctx);
+
+/* r600_viewport.c */
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor);
+void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
+ bool scissor_enable, bool clip_halfz);
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info);
+void r600_init_viewport_functions(struct r600_common_context *rctx);
+
+/* cayman_msaa.c */
+extern const uint32_t eg_sample_locs_2x[4];
+extern const unsigned eg_max_dist_2x;
+extern const uint32_t eg_sample_locs_4x[4];
+extern const unsigned eg_max_dist_4x;
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value);
+void cayman_init_msaa(struct pipe_context *ctx);
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+ int ps_iter_samples, int overrast_samples,
+ unsigned sc_mode_cntl_1);
+
+
+/* Inline helpers. */
+
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
+{
+ return (struct r600_resource*)r;
+}
+
+static inline void
+r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
+{
+ pipe_resource_reference((struct pipe_resource **)ptr,
+ (struct pipe_resource *)res);
+}
+
+static inline void
+r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
+{
+ pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
+}
+
+static inline void
+r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_resource *res = (struct r600_resource *)r;
+
+ if (res) {
+ /* Add memory usage for need_gfx_cs_space */
+ rctx->vram += res->vram_usage;
+ rctx->gtt += res->gart_usage;
+ }
+}
+
+static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
+{
+ return rctx->streamout.streamout_enabled ||
+ rctx->streamout.prims_gen_query_enabled;
+}
+
+#define SQ_TEX_XY_FILTER_POINT 0x00
+#define SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
+
+static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
+ : SQ_TEX_XY_FILTER_POINT;
+}
+
+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+ if (filter < 2)
+ return 0;
+ if (filter < 4)
+ return 1;
+ if (filter < 8)
+ return 2;
+ if (filter < 16)
+ return 3;
+ return 4;
+}
+
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ return 16;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SAMPLER_BUFFER;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
+static inline bool
+r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
+{
+ return (stencil_sampler && tex->can_sample_s) ||
+ (!stencil_sampler && tex->can_sample_z);
+}
+
+static inline bool
+r600_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+ return tex->htile_offset && level == 0;
+}
+
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+ do { \
+ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
+ } while (0);
+
+#define R600_ERR(fmt, args...) \
+ fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
+
+/* For MSAA sample positions. */
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
+ (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
+ (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
+ (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+
+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_query.c b/lib/mesa/src/gallium/drivers/r600/r600_query.c
new file mode 100644
index 000000000..aa3e36f56
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_query.c
@@ -0,0 +1,2126 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r600_query.h"
+#include "r600_pipe.h"
+#include "r600_cs.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "os/os_time.h"
+#include "tgsi/tgsi_text.h"
+
+#define R600_MAX_STREAMS 4
+
+struct r600_hw_query_params {
+ unsigned start_offset;
+ unsigned end_offset;
+ unsigned fence_offset;
+ unsigned pair_stride;
+ unsigned pair_count;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+ struct r600_query b;
+
+ uint64_t begin_result;
+ uint64_t end_result;
+
+ uint64_t begin_time;
+ uint64_t end_time;
+
+ /* Fence for GPU_FINISHED. */
+ struct pipe_fence_handle *fence;
+};
+
+static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
+ FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+ case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+ case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
+ case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
+ case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+ case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
+ case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
+ case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
+ case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
+ case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+ case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
+ case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
+ case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+ case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+ case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+ case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
+ default: unreachable("query type does not correspond to winsys id");
+ }
+}
+
+static bool r600_query_sw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->begin_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->begin_result = rctx->num_mrt_draw_calls;
+ break;
+ case R600_QUERY_PRIM_RESTART_CALLS:
+ query->begin_result = rctx->num_prim_restart_calls;
+ break;
+ case R600_QUERY_SPILL_DRAW_CALLS:
+ query->begin_result = rctx->num_spill_draw_calls;
+ break;
+ case R600_QUERY_COMPUTE_CALLS:
+ query->begin_result = rctx->num_compute_calls;
+ break;
+ case R600_QUERY_SPILL_COMPUTE_CALLS:
+ query->begin_result = rctx->num_spill_compute_calls;
+ break;
+ case R600_QUERY_DMA_CALLS:
+ query->begin_result = rctx->num_dma_calls;
+ break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->begin_result = rctx->num_cp_dma_calls;
+ break;
+ case R600_QUERY_NUM_VS_FLUSHES:
+ query->begin_result = rctx->num_vs_flushes;
+ break;
+ case R600_QUERY_NUM_PS_FLUSHES:
+ query->begin_result = rctx->num_ps_flushes;
+ break;
+ case R600_QUERY_NUM_CS_FLUSHES:
+ query->begin_result = rctx->num_cs_flushes;
+ break;
+ case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_cb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_db_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->begin_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->begin_result = rctx->num_L2_writebacks;
+ break;
+ case R600_QUERY_NUM_RESIDENT_HANDLES:
+ query->begin_result = rctx->num_resident_handles;
+ break;
+ case R600_QUERY_TC_OFFLOADED_SLOTS:
+ query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
+ break;
+ case R600_QUERY_TC_DIRECT_SLOTS:
+ query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
+ break;
+ case R600_QUERY_TC_NUM_SYNCS:
+ query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_MAPPED_VRAM:
+ case R600_QUERY_MAPPED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
+ case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_EVICTIONS:
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->begin_time = rctx->ws->query_value(rctx->ws,
+ RADEON_NUM_GFX_IBS);
+ break;
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->begin_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ query->begin_result =
+ rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
+ query->begin_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ query->begin_result = r600_begin_counter(rctx->screen,
+ query->b.type);
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->begin_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
+ default:
+ unreachable("r600_query_sw_begin: bad query type");
+ }
+
+ return true;
+}
+
+static bool r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->end_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->end_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->end_result = rctx->num_mrt_draw_calls;
+ break;
+ case R600_QUERY_PRIM_RESTART_CALLS:
+ query->end_result = rctx->num_prim_restart_calls;
+ break;
+ case R600_QUERY_SPILL_DRAW_CALLS:
+ query->end_result = rctx->num_spill_draw_calls;
+ break;
+ case R600_QUERY_COMPUTE_CALLS:
+ query->end_result = rctx->num_compute_calls;
+ break;
+ case R600_QUERY_SPILL_COMPUTE_CALLS:
+ query->end_result = rctx->num_spill_compute_calls;
+ break;
+ case R600_QUERY_DMA_CALLS:
+ query->end_result = rctx->num_dma_calls;
+ break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->end_result = rctx->num_cp_dma_calls;
+ break;
+ case R600_QUERY_NUM_VS_FLUSHES:
+ query->end_result = rctx->num_vs_flushes;
+ break;
+ case R600_QUERY_NUM_PS_FLUSHES:
+ query->end_result = rctx->num_ps_flushes;
+ break;
+ case R600_QUERY_NUM_CS_FLUSHES:
+ query->end_result = rctx->num_cs_flushes;
+ break;
+ case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+ query->end_result = rctx->num_cb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+ query->end_result = rctx->num_db_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->end_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->end_result = rctx->num_L2_writebacks;
+ break;
+ case R600_QUERY_NUM_RESIDENT_HANDLES:
+ query->end_result = rctx->num_resident_handles;
+ break;
+ case R600_QUERY_TC_OFFLOADED_SLOTS:
+ query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
+ break;
+ case R600_QUERY_TC_DIRECT_SLOTS:
+ query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
+ break;
+ case R600_QUERY_TC_NUM_SYNCS:
+ query->end_result = rctx->tc ? rctx->tc->num_syncs : 0;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_MAPPED_VRAM:
+ case R600_QUERY_MAPPED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
+ case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_EVICTIONS:
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->end_time = rctx->ws->query_value(rctx->ws,
+ RADEON_NUM_GFX_IBS);
+ break;
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->end_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ query->end_result =
+ rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
+ query->end_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ query->end_result = r600_end_counter(rctx->screen,
+ query->b.type,
+ query->begin_result);
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->end_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->end_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
+ default:
+ unreachable("r600_query_sw_end: bad query type");
+ }
+
+ return true;
+}
+
+static bool r600_query_sw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ union pipe_query_result *result)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* Convert from cycles per millisecond to cycles per second (Hz). */
+ result->timestamp_disjoint.frequency =
+ (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
+ result->timestamp_disjoint.disjoint = false;
+ return true;
+ case PIPE_QUERY_GPU_FINISHED: {
+ struct pipe_screen *screen = rctx->b.screen;
+ struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
+
+ result->b = screen->fence_finish(screen, ctx, query->fence,
+ wait ? PIPE_TIMEOUT_INFINITE : 0);
+ return result->b;
+ }
+
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ result->u64 = (query->end_result - query->begin_result) /
+ (query->end_time - query->begin_time);
+ return true;
+ case R600_QUERY_CS_THREAD_BUSY:
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ result->u64 = (query->end_result - query->begin_result) * 100 /
+ (query->end_time - query->begin_time);
+ return true;
+ case R600_QUERY_GPIN_ASIC_ID:
+ result->u32 = 0;
+ return true;
+ case R600_QUERY_GPIN_NUM_SIMD:
+ result->u32 = rctx->screen->info.num_good_compute_units;
+ return true;
+ case R600_QUERY_GPIN_NUM_RB:
+ result->u32 = rctx->screen->info.num_render_backends;
+ return true;
+ case R600_QUERY_GPIN_NUM_SPI:
+ result->u32 = 1; /* all supported chips have one SPI per SE */
+ return true;
+ case R600_QUERY_GPIN_NUM_SE:
+ result->u32 = rctx->screen->info.max_se;
+ return true;
+ }
+
+ result->u64 = query->end_result - query->begin_result;
+
+ switch (query->b.type) {
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GPU_TEMPERATURE:
+ result->u64 /= 1000;
+ break;
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ result->u64 *= 1000000;
+ break;
+ }
+
+ return true;
+}
+
+
+static struct r600_query_ops sw_query_ops = {
+ .destroy = r600_query_sw_destroy,
+ .begin = r600_query_sw_begin,
+ .end = r600_query_sw_end,
+ .get_result = r600_query_sw_get_result,
+ .get_result_resource = NULL
+};
+
+static struct pipe_query *r600_query_sw_create(unsigned query_type)
+{
+ struct r600_query_sw *query;
+
+ query = CALLOC_STRUCT(r600_query_sw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &sw_query_ops;
+
+ return (struct pipe_query *)query;
+}
+
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Release all query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ r600_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
+
+ r600_resource_reference(&query->buffer.buf, NULL);
+ r600_resource_reference(&query->workaround_buf, NULL);
+ FREE(rquery);
+}
+
+static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query)
+{
+ unsigned buf_size = MAX2(query->result_size,
+ rscreen->info.min_alloc_size);
+
+ /* Queries are normally read by the CPU after
+ * being written by the gpu, hence staging is probably a good
+ * usage pattern.
+ */
+ struct r600_resource *buf = (struct r600_resource*)
+ pipe_buffer_create(&rscreen->b, 0,
+ PIPE_USAGE_STAGING, buf_size);
+ if (!buf)
+ return NULL;
+
+ if (!query->ops->prepare_buffer(rscreen, query, buf)) {
+ r600_resource_reference(&buf, NULL);
+ return NULL;
+ }
+
+ return buf;
+}
+
+static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer)
+{
+ /* Callers ensure that the buffer is currently unused by the GPU. */
+ uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ if (!results)
+ return false;
+
+ memset(results, 0, buffer->b.b.width0);
+
+ if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ unsigned max_rbs = rscreen->info.num_render_backends;
+ unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
+ unsigned num_results;
+ unsigned i, j;
+
+ /* Set top bits for unused backends. */
+ num_results = buffer->b.b.width0 / query->result_size;
+ for (j = 0; j < num_results; j++) {
+ for (i = 0; i < max_rbs; i++) {
+ if (!(enabled_rb_mask & (1<<i))) {
+ results[(i * 4)+1] = 0x80000000;
+ results[(i * 4)+3] = 0x80000000;
+ }
+ }
+ results += 4 * max_rbs;
+ }
+ }
+
+ return true;
+}
+
+static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+
+static struct r600_query_ops query_hw_ops = {
+ .destroy = r600_query_hw_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result,
+ .get_result_resource = r600_query_hw_get_result_resource,
+};
+
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+ union pipe_query_result *);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+ .prepare_buffer = r600_query_hw_prepare_buffer,
+ .emit_start = r600_query_hw_do_emit_start,
+ .emit_stop = r600_query_hw_do_emit_stop,
+ .clear_result = r600_query_hw_clear_result,
+ .add_result = r600_query_hw_add_result,
+};
+
+bool r600_query_hw_init(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query)
+{
+ query->buffer.buf = r600_new_query_buffer(rscreen, query);
+ if (!query->buffer.buf)
+ return false;
+
+ return true;
+}
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
+ unsigned query_type,
+ unsigned index)
+{
+ struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &query_hw_ops;
+ query->ops = &query_hw_default_hw_ops;
+
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ query->result_size = 16 * rscreen->info.num_render_backends;
+ query->result_size += 16; /* for the fence + alignment */
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ query->result_size = 24;
+ query->num_cs_dw_begin = 8;
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ query->result_size = 16;
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ query->flags = R600_QUERY_HW_FLAG_NO_START;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->stream = index;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32 * R600_MAX_STREAMS;
+ query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
+ query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ /* 11 values on EG, 8 on R600. */
+ query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->result_size += 8; /* for the fence + alignment */
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ default:
+ assert(0);
+ FREE(query);
+ return NULL;
+ }
+
+ if (!r600_query_hw_init(rscreen, query)) {
+ FREE(query);
+ return NULL;
+ }
+
+ return (struct pipe_query *)query;
+}
+
+static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ bool old_enable = rctx->num_occlusion_queries != 0;
+ bool old_perfect_enable =
+ rctx->num_perfect_occlusion_queries != 0;
+ bool enable, perfect_enable;
+
+ rctx->num_occlusion_queries += diff;
+ assert(rctx->num_occlusion_queries >= 0);
+
+ if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ rctx->num_perfect_occlusion_queries += diff;
+ assert(rctx->num_perfect_occlusion_queries >= 0);
+ }
+
+ enable = rctx->num_occlusion_queries != 0;
+ perfect_enable = rctx->num_perfect_occlusion_queries != 0;
+
+ if (enable != old_enable || perfect_enable != old_perfect_enable) {
+ struct r600_context *ctx = (struct r600_context*)rctx;
+ r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ }
+ }
+}
+
+static unsigned event_type_for_stream(unsigned stream)
+{
+ switch (stream) {
+ default:
+ case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+ case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
+ case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+ case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+ }
+}
+
+static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
+ unsigned stream)
+{
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+}
+
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ /* Write the timestamp after the last draw is done.
+ * (bottom-of-pipe)
+ */
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP,
+ NULL, va, 0, query->b.type);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ default:
+ assert(0);
+ }
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ if (!query->buffer.buf)
+ return; // previous buffer allocation failure
+
+ r600_update_occlusion_query_state(ctx, query->b.type, 1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, 1);
+
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
+ true);
+
+ /* Get a new query buffer if needed. */
+ if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
+ struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+ *qbuf = query->buffer;
+ query->buffer.results_end = 0;
+ query->buffer.previous = qbuf;
+ query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
+ if (!query->buffer.buf)
+ return;
+ }
+
+ /* emit begin query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_start(ctx, query, query->buffer.buf, va);
+
+ ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
+}
+
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ uint64_t fence_va = 0;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ va += 8;
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ va += 16;
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ va += 16;
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ va += 8;
+ /* fall through */
+ case PIPE_QUERY_TIMESTAMP:
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
+ 0, query->b.type);
+ fence_va = va + 8;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS: {
+ unsigned sample_size = (query->result_size - 8) / 2;
+
+ va += sample_size;
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ fence_va = va + sample_size;
+ break;
+ }
+ default:
+ assert(0);
+ }
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+
+ if (fence_va)
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DATA_SEL_VALUE_32BIT,
+ query->buffer.buf, fence_va, 0x80000000,
+ query->b.type);
+}
+
+static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ if (!query->buffer.buf)
+ return; // previous buffer allocation failure
+
+ /* The queries which need begin already called this in begin_query. */
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false);
+ }
+
+ /* emit end query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_stop(ctx, query, query->buffer.buf, va);
+
+ query->buffer.results_end += query->result_size;
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
+
+ r600_update_occlusion_query_state(ctx, query->b.type, -1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, -1);
+}
+
+static void emit_set_predicate(struct r600_common_context *ctx,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t op)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, op | ((va >> 32) & 0xFF));
+ r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
+ RADEON_PRIO_QUERY);
+}
+
+static void r600_emit_query_predication(struct r600_common_context *ctx,
+ struct r600_atom *atom)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
+ struct r600_query_buffer *qbuf;
+ uint32_t op;
+ bool flag_wait, invert;
+
+ if (!query)
+ return;
+
+ invert = ctx->render_cond_invert;
+ flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+
+ if (query->workaround_buf) {
+ op = PRED_OP(PREDICATION_OP_BOOL64);
+ } else {
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_ZPASS);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+ invert = !invert;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ }
+
+ /* if true then invert, see GL_ARB_conditional_render_inverted */
+ if (invert)
+ op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
+ else
+ op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
+
+ /* Use the value written by compute shader as a workaround. Note that
+ * the wait flag does not apply in this predication mode.
+ *
+ * The shader outputs the result value to L2. Workarounds only affect VI
+ * and later, where the CP reads data from L2, so we don't need an
+ * additional flush.
+ */
+ if (query->workaround_buf) {
+ uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
+ emit_set_predicate(ctx, query->workaround_buf, va, op);
+ return;
+ }
+
+ op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+
+ /* emit predicate packets for all data blocks */
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned results_base = 0;
+ uint64_t va_base = qbuf->buf->gpu_address;
+
+ while (results_base < qbuf->results_end) {
+ uint64_t va = va_base + results_base;
+
+ if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
+
+ /* set CONTINUE bit for all packets except the first */
+ op |= PREDICATION_CONTINUE;
+ }
+ } else {
+ emit_set_predicate(ctx, qbuf->buf, va, op);
+ op |= PREDICATION_CONTINUE;
+ }
+
+ results_base += query->result_size;
+ }
+ }
+}
+
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+ struct r600_common_screen *rscreen =
+ (struct r600_common_screen *)ctx->screen;
+
+ if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+ query_type == PIPE_QUERY_GPU_FINISHED ||
+ query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return r600_query_sw_create(query_type);
+
+ return r600_query_hw_create(rscreen, query_type, index);
+}
+
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ rquery->ops->destroy(rctx->screen, rquery);
+}
+
+static boolean r600_begin_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->begin(rctx, rquery);
+}
+
+void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Discard the old query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ r600_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
+
+ query->buffer.results_end = 0;
+ query->buffer.previous = NULL;
+
+ /* Obtain a new buffer if the current one can't be mapped without a stall. */
+ if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ r600_resource_reference(&query->buffer.buf, NULL);
+ query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
+ } else {
+ if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
+ r600_resource_reference(&query->buffer.buf, NULL);
+ }
+}
+
+bool r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ assert(0);
+ return false;
+ }
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_resource_reference(&query->workaround_buf, NULL);
+
+ r600_query_hw_emit_start(rctx, query);
+ if (!query->buffer.buf)
+ return false;
+
+ LIST_ADDTAIL(&query->list, &rctx->active_queries);
+ return true;
+}
+
+static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->end(rctx, rquery);
+}
+
+bool r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START)
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_query_hw_emit_stop(rctx, query);
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ LIST_DELINIT(&query->list);
+
+ if (!query->buffer.buf)
+ return false;
+
+ return true;
+}
+
+static void r600_get_hw_query_params(struct r600_common_context *rctx,
+ struct r600_query_hw *rquery, int index,
+ struct r600_hw_query_params *params)
+{
+ unsigned max_rbs = rctx->screen->info.num_render_backends;
+
+ params->pair_stride = 0;
+ params->pair_count = 1;
+
+ switch (rquery->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ params->start_offset = 0;
+ params->end_offset = 8;
+ params->fence_offset = max_rbs * 16;
+ params->pair_stride = 16;
+ params->pair_count = max_rbs;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ params->start_offset = 0;
+ params->end_offset = 8;
+ params->fence_offset = 16;
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ params->start_offset = 0;
+ params->end_offset = 0;
+ params->fence_offset = 8;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ params->start_offset = 8;
+ params->end_offset = 24;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ params->start_offset = 0;
+ params->end_offset = 16;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ params->start_offset = 8 - index * 8;
+ params->end_offset = 24 - index * 8;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ params->pair_count = R600_MAX_STREAMS;
+ params->pair_stride = 32;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ params->start_offset = 0;
+ params->end_offset = 16;
+
+ /* We can re-use the high dword of the last 64-bit value as a
+ * fence: it is initialized as 0, and the high bit is set by
+ * the write of the streamout stats event.
+ */
+ params->fence_offset = rquery->result_size - 4;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ {
+ /* Offsets apply to EG+ */
+ static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
+ params->start_offset = offsets[index];
+ params->end_offset = 88 + offsets[index];
+ params->fence_offset = 2 * 88;
+ break;
+ }
+ default:
+ unreachable("r600_get_hw_query_params unsupported");
+ }
+}
+
+static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
+ bool test_status_bit)
+{
+ uint32_t *current_result = (uint32_t*)map;
+ uint64_t start, end;
+
+ start = (uint64_t)current_result[start_index] |
+ (uint64_t)current_result[start_index+1] << 32;
+ end = (uint64_t)current_result[end_index] |
+ (uint64_t)current_result[end_index+1] << 32;
+
+ if (!test_status_bit ||
+ ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
+ return end - start;
+ }
+ return 0;
+}
+
+static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query,
+ void *buffer,
+ union pipe_query_result *result)
+{
+ unsigned max_rbs = rscreen->info.num_render_backends;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: {
+ for (unsigned i = 0; i < max_rbs; ++i) {
+ unsigned results_base = i * 16;
+ result->u64 +=
+ r600_query_read_result(buffer + results_base, 0, 2, true);
+ }
+ break;
+ }
+ case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ for (unsigned i = 0; i < max_rbs; ++i) {
+ unsigned results_base = i * 16;
+ result->b = result->b ||
+ r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
+ }
+ break;
+ }
+ case PIPE_QUERY_TIME_ELAPSED:
+ result->u64 += r600_query_read_result(buffer, 0, 2, false);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ result->u64 = *(uint64_t*)buffer;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ /* SAMPLE_STREAMOUTSTATS stores this structure:
+ * {
+ * u64 NumPrimitivesWritten;
+ * u64 PrimitiveStorageNeeded;
+ * }
+ * We only need NumPrimitivesWritten here. */
+ result->u64 += r600_query_read_result(buffer, 2, 6, true);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ /* Here we read PrimitiveStorageNeeded. */
+ result->u64 += r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ result->so_statistics.num_primitives_written +=
+ r600_query_read_result(buffer, 2, 6, true);
+ result->so_statistics.primitives_storage_needed +=
+ r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
+ buffer = (char *)buffer + 32;
+ }
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ if (rscreen->chip_class >= EVERGREEN) {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(buffer, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(buffer, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(buffer, 20, 42, false);
+ } else {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 16, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 18, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 20, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 22, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 24, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 26, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 28, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 30, false);
+ }
+#if 0 /* for testing */
+ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
+ "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
+ "Clipper prims=%llu, PS=%llu, CS=%llu\n",
+ result->pipeline_statistics.ia_vertices,
+ result->pipeline_statistics.ia_primitives,
+ result->pipeline_statistics.vs_invocations,
+ result->pipeline_statistics.hs_invocations,
+ result->pipeline_statistics.ds_invocations,
+ result->pipeline_statistics.gs_invocations,
+ result->pipeline_statistics.gs_primitives,
+ result->pipeline_statistics.c_invocations,
+ result->pipeline_statistics.c_primitives,
+ result->pipeline_statistics.ps_invocations,
+ result->pipeline_statistics.cs_invocations);
+#endif
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *query, boolean wait,
+ union pipe_query_result *result)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->get_result(rctx, rquery, wait, result);
+}
+
+static void r600_get_query_result_resource(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index,
+ resource, offset);
+}
+
+static void r600_query_hw_clear_result(struct r600_query_hw *query,
+ union pipe_query_result *result)
+{
+ util_query_clear_result(result, query->b.type);
+}
+
+bool r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait, union pipe_query_result *result)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *qbuf;
+
+ query->ops->clear_result(query, result);
+
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned usage = PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
+ unsigned results_base = 0;
+ void *map;
+
+ if (rquery->b.flushed)
+ map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
+ else
+ map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
+
+ if (!map)
+ return false;
+
+ while (results_base != qbuf->results_end) {
+ query->ops->add_result(rscreen, query, map + results_base,
+ result);
+ results_base += query->result_size;
+ }
+ }
+
+ /* Convert the time to expected units. */
+ if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
+ rquery->type == PIPE_QUERY_TIMESTAMP) {
+ result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
+ }
+ return true;
+}
+
+/* Create the compute shader that is used to collect the results.
+ *
+ * One compute grid with a single thread is launched for every query result
+ * buffer. The thread (optionally) reads a previous summary buffer, then
+ * accumulates data from the query result buffer, and writes the result either
+ * to a summary buffer to be consumed by the next grid invocation or to the
+ * user-supplied buffer.
+ *
+ * Data layout:
+ *
+ * CONST
+ * 0.x = end_offset
+ * 0.y = result_stride
+ * 0.z = result_count
+ * 0.w = bit field:
+ * 1: read previously accumulated values
+ * 2: write accumulated values for chaining
+ * 4: write result available
+ * 8: convert result to boolean (0/1)
+ * 16: only read one dword and use that as result
+ * 32: apply timestamp conversion
+ * 64: store full 64 bits result
+ * 128: store signed 32 bits result
+ * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
+ * 1.x = fence_offset
+ * 1.y = pair_stride
+ * 1.z = pair_count
+ *
+ * BUFFER[0] = query result buffer
+ * BUFFER[1] = previous summary buffer
+ * BUFFER[2] = next summary buffer or user-supplied buffer
+ */
+static void r600_create_query_result_shader(struct r600_common_context *rctx)
+{
+ /* TEMP[0].xy = accumulated result so far
+ * TEMP[0].z = result not available
+ *
+ * TEMP[1].x = current result index
+ * TEMP[1].y = current pair index
+ */
+ static const char text_tmpl[] =
+ "COMP\n"
+ "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
+ "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
+ "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+ "DCL BUFFER[0]\n"
+ "DCL BUFFER[1]\n"
+ "DCL BUFFER[2]\n"
+ "DCL CONST[0][0..1]\n"
+ "DCL TEMP[0..5]\n"
+ "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
+ "IMM[1] UINT32 {1, 2, 4, 8}\n"
+ "IMM[2] UINT32 {16, 32, 64, 128}\n"
+ "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
+ "IMM[4] UINT32 {256, 0, 0, 0}\n"
+
+ "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
+ "UIF TEMP[5]\n"
+ /* Check result availability. */
+ "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n"
+ "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
+ "MOV TEMP[1], TEMP[0].zzzz\n"
+ "NOT TEMP[0].z, TEMP[0].zzzz\n"
+
+ /* Load result if available. */
+ "UIF TEMP[1]\n"
+ "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
+ "ENDIF\n"
+ "ELSE\n"
+ /* Load previously accumulated result if requested. */
+ "MOV TEMP[0], IMM[0].xxxx\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
+ "UIF TEMP[4]\n"
+ "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
+ "ENDIF\n"
+
+ "MOV TEMP[1].x, IMM[0].xxxx\n"
+ "BGNLOOP\n"
+ /* Break if accumulated result so far is not available. */
+ "UIF TEMP[0].zzzz\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ /* Break if result_index >= result_count. */
+ "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
+ "UIF TEMP[5]\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ /* Load fence and check result availability */
+ "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
+ "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
+ "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
+ "NOT TEMP[0].z, TEMP[0].zzzz\n"
+ "UIF TEMP[0].zzzz\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ "MOV TEMP[1].y, IMM[0].xxxx\n"
+ "BGNLOOP\n"
+ /* Load start and end. */
+ "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
+ "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
+ "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
+
+ "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
+
+ "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
+ "UIF TEMP[5].zzzz\n"
+ /* Load second start/end half-pair and
+ * take the difference
+ */
+ "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
+ "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
+ "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
+ "ENDIF\n"
+
+ "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
+
+ /* Increment pair index */
+ "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
+ "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
+ "UIF TEMP[5]\n"
+ "BRK\n"
+ "ENDIF\n"
+ "ENDLOOP\n"
+
+ /* Increment result index */
+ "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
+ "ENDLOOP\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
+ "UIF TEMP[4]\n"
+ /* Store accumulated data for chaining. */
+ "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
+ "ELSE\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
+ "UIF TEMP[4]\n"
+ /* Store result availability. */
+ "NOT TEMP[0].z, TEMP[0]\n"
+ "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
+ "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
+ "UIF TEMP[4]\n"
+ "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
+ "ENDIF\n"
+ "ELSE\n"
+ /* Store result if it is available. */
+ "NOT TEMP[4], TEMP[0].zzzz\n"
+ "UIF TEMP[4]\n"
+ /* Apply timestamp conversion */
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
+ "UIF TEMP[4]\n"
+ "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
+ "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
+ "ENDIF\n"
+
+ /* Convert to boolean */
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
+ "UIF TEMP[4]\n"
+ "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
+ "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
+ "MOV TEMP[0].y, IMM[0].xxxx\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
+ "UIF TEMP[4]\n"
+ "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
+ "ELSE\n"
+ /* Clamping */
+ "UIF TEMP[0].yyyy\n"
+ "MOV TEMP[0].x, IMM[0].wwww\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
+ "UIF TEMP[4]\n"
+ "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
+ "ENDIF\n"
+
+ "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
+ "ENDIF\n"
+ "ENDIF\n"
+ "ENDIF\n"
+ "ENDIF\n"
+
+ "END\n";
+
+ char text[sizeof(text_tmpl) + 32];
+ struct tgsi_token tokens[1024];
+ struct pipe_compute_state state = {};
+
+ /* Hard code the frequency into the shader so that the backend can
+ * use the full range of optimizations for divide-by-constant.
+ */
+ snprintf(text, sizeof(text), text_tmpl,
+ rctx->screen->info.clock_crystal_freq);
+
+ if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
+ assert(false);
+ return;
+ }
+
+ state.ir_type = PIPE_SHADER_IR_TGSI;
+ state.prog = tokens;
+
+ rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
+}
+
+static void r600_restore_qbo_state(struct r600_common_context *rctx,
+ struct r600_qbo_state *st)
+{
+ rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
+
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
+ pipe_resource_reference(&st->saved_const0.buffer, NULL);
+
+ rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
+ for (unsigned i = 0; i < 3; ++i)
+ pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
+}
+
+static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *qbuf;
+ struct r600_query_buffer *qbuf_prev;
+ struct pipe_resource *tmp_buffer = NULL;
+ unsigned tmp_buffer_offset = 0;
+ struct r600_qbo_state saved_state = {};
+ struct pipe_grid_info grid = {};
+ struct pipe_constant_buffer constant_buffer = {};
+ struct pipe_shader_buffer ssbo[3];
+ struct r600_hw_query_params params;
+ struct {
+ uint32_t end_offset;
+ uint32_t result_stride;
+ uint32_t result_count;
+ uint32_t config;
+ uint32_t fence_offset;
+ uint32_t pair_stride;
+ uint32_t pair_count;
+ } consts;
+
+ if (!rctx->query_result_shader) {
+ r600_create_query_result_shader(rctx);
+ if (!rctx->query_result_shader)
+ return;
+ }
+
+ if (query->buffer.previous) {
+ u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
+ &tmp_buffer_offset, &tmp_buffer);
+ if (!tmp_buffer)
+ return;
+ }
+
+ rctx->save_qbo_state(&rctx->b, &saved_state);
+
+ r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, &params);
+ consts.end_offset = params.end_offset - params.start_offset;
+ consts.fence_offset = params.fence_offset - params.start_offset;
+ consts.result_stride = query->result_size;
+ consts.pair_stride = params.pair_stride;
+ consts.pair_count = params.pair_count;
+
+ constant_buffer.buffer_size = sizeof(consts);
+ constant_buffer.user_buffer = &consts;
+
+ ssbo[1].buffer = tmp_buffer;
+ ssbo[1].buffer_offset = tmp_buffer_offset;
+ ssbo[1].buffer_size = 16;
+
+ ssbo[2] = ssbo[1];
+
+ rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader);
+
+ grid.block[0] = 1;
+ grid.block[1] = 1;
+ grid.block[2] = 1;
+ grid.grid[0] = 1;
+ grid.grid[1] = 1;
+ grid.grid[2] = 1;
+
+ consts.config = 0;
+ if (index < 0)
+ consts.config |= 4;
+ if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE)
+ consts.config |= 8;
+ else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ consts.config |= 8 | 256;
+ else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
+ query->b.type == PIPE_QUERY_TIME_ELAPSED)
+ consts.config |= 32;
+
+ switch (result_type) {
+ case PIPE_QUERY_TYPE_U64:
+ case PIPE_QUERY_TYPE_I64:
+ consts.config |= 64;
+ break;
+ case PIPE_QUERY_TYPE_I32:
+ consts.config |= 128;
+ break;
+ case PIPE_QUERY_TYPE_U32:
+ break;
+ }
+
+ rctx->flags |= rctx->screen->barrier_flags.cp_to_L2;
+
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
+ if (query->b.type != PIPE_QUERY_TIMESTAMP) {
+ qbuf_prev = qbuf->previous;
+ consts.result_count = qbuf->results_end / query->result_size;
+ consts.config &= ~3;
+ if (qbuf != &query->buffer)
+ consts.config |= 1;
+ if (qbuf->previous)
+ consts.config |= 2;
+ } else {
+ /* Only read the last timestamp. */
+ qbuf_prev = NULL;
+ consts.result_count = 0;
+ consts.config |= 16;
+ params.start_offset += qbuf->results_end - query->result_size;
+ }
+
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+
+ ssbo[0].buffer = &qbuf->buf->b.b;
+ ssbo[0].buffer_offset = params.start_offset;
+ ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
+
+ if (!qbuf->previous) {
+ ssbo[2].buffer = resource;
+ ssbo[2].buffer_offset = offset;
+ ssbo[2].buffer_size = 8;
+
+ }
+
+ rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
+
+ if (wait && qbuf == &query->buffer) {
+ uint64_t va;
+
+ /* Wait for result availability. Wait only for readiness
+ * of the last entry, since the fence writes should be
+ * serialized in the CP.
+ */
+ va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
+ va += params.fence_offset;
+
+ r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
+ }
+
+ rctx->b.launch_grid(&rctx->b, &grid);
+ rctx->flags |= rctx->screen->barrier_flags.compute_to_L2;
+ }
+
+ r600_restore_qbo_state(rctx, &saved_state);
+ pipe_resource_reference(&tmp_buffer, NULL);
+}
+
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean condition,
+ enum pipe_render_cond_flag mode)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query_hw *rquery = (struct r600_query_hw *)query;
+ struct r600_query_buffer *qbuf;
+ struct r600_atom *atom = &rctx->render_cond_atom;
+
+ /* Compute the size of SET_PREDICATION packets. */
+ atom->num_dw = 0;
+ if (query) {
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+
+ if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ atom->num_dw *= R600_MAX_STREAMS;
+ }
+
+ rctx->render_cond = query;
+ rctx->render_cond_invert = condition;
+ rctx->render_cond_mode = mode;
+
+ rctx->set_atom_dirty(rctx, atom, query != NULL);
+}
+
+void r600_suspend_queries(struct r600_common_context *ctx)
+{
+ struct r600_query_hw *query;
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
+ r600_query_hw_emit_stop(ctx, query);
+ }
+ assert(ctx->num_cs_dw_queries_suspend == 0);
+}
+
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
+ struct list_head *query_list)
+{
+ struct r600_query_hw *query;
+ unsigned num_dw = 0;
+
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ /* begin + end */
+ num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
+
+ /* Workaround for the fact that
+ * num_cs_dw_nontimer_queries_suspend is incremented for every
+ * resumed query, which raises the bar in need_cs_space for
+ * queries about to be resumed.
+ */
+ num_dw += query->num_cs_dw_end;
+ }
+ /* primitives generated query */
+ num_dw += ctx->streamout.enable_atom.num_dw;
+ /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
+ num_dw += 13;
+
+ return num_dw;
+}
+
+void r600_resume_queries(struct r600_common_context *ctx)
+{
+ struct r600_query_hw *query;
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
+
+ assert(ctx->num_cs_dw_queries_suspend == 0);
+
+ /* Check CS space here. Resuming must not be interrupted by flushes. */
+ ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
+ r600_query_hw_emit_start(ctx, query);
+ }
+}
+
+/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
+{
+ struct r600_common_context *ctx =
+ (struct r600_common_context*)rscreen->aux_context;
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ struct r600_resource *buffer;
+ uint32_t *results;
+ unsigned i, mask = 0;
+ unsigned max_rbs = ctx->screen->info.num_render_backends;
+
+ assert(rscreen->chip_class <= CAYMAN);
+
+ /* if backend_map query is supported by the kernel */
+ if (rscreen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
+ unsigned backend_map = rscreen->info.r600_gb_backend_map;
+ unsigned item_width, item_mask;
+
+ if (ctx->chip_class >= EVERGREEN) {
+ item_width = 4;
+ item_mask = 0x7;
+ } else {
+ item_width = 2;
+ item_mask = 0x3;
+ }
+
+ while (num_tile_pipes--) {
+ i = backend_map & item_mask;
+ mask |= (1<<i);
+ backend_map >>= item_width;
+ }
+ if (mask != 0) {
+ rscreen->info.enabled_rb_mask = mask;
+ return;
+ }
+ }
+
+ /* otherwise backup path for older kernels */
+
+ /* create buffer for event data */
+ buffer = (struct r600_resource*)
+ pipe_buffer_create(ctx->b.screen, 0,
+ PIPE_USAGE_STAGING, max_rbs * 16);
+ if (!buffer)
+ return;
+
+ /* initialize buffer with zeroes */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+ if (results) {
+ memset(results, 0, max_rbs * 4 * 4);
+
+ /* emit EVENT_WRITE for ZPASS_DONE */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, buffer->gpu_address);
+ radeon_emit(cs, buffer->gpu_address >> 32);
+
+ r600_emit_reloc(ctx, &ctx->gfx, buffer,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
+
+ /* analyze results */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+ if (results) {
+ for(i = 0; i < max_rbs; i++) {
+ /* at least highest bit will be set if backend is used */
+ if (results[i*4 + 1])
+ mask |= (1<<i);
+ }
+ }
+ }
+
+ r600_resource_reference(&buffer, NULL);
+
+ if (mask)
+ rscreen->info.enabled_rb_mask = mask;
+}
+
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
+ { \
+ .name = name_, \
+ .query_type = R600_QUERY_##query_type_, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+ .group_id = group_id_ \
+ }
+
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+ X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
+ X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
+ X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+ X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
+ X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
+ X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
+ X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
+ X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
+ X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
+ X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
+ X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
+ X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
+ X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
+ X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
+ X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
+ X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
+ X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
+ X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
+ X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
+ X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
+ X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
+ X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
+ X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+ X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
+ X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
+ X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
+ X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
+ X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
+ X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
+ X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
+ X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
+ X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
+ X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
+ X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
+ X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
+ X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
+ X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
+ X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
+ X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
+ X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
+ X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
+ X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
+ X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
+ X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
+ X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
+ X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
+ X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
+ X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
+ X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
+ X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
+ X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
+ X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
+ X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
+ X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
+ X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
+};
+
+#undef X
+#undef XG
+#undef XFULL
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+ return ARRAY_SIZE(r600_driver_query_list);
+ else
+ return ARRAY_SIZE(r600_driver_query_list) - 25;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned num_queries = r600_get_num_queries(rscreen);
+
+ if (!info) {
+ unsigned num_perfcounters =
+ r600_get_perfcounter_info(rscreen, 0, NULL);
+
+ return num_queries + num_perfcounters;
+ }
+
+ if (index >= num_queries)
+ return r600_get_perfcounter_info(rscreen, index - num_queries, info);
+
+ *info = r600_driver_query_list[index];
+
+ switch (info->query_type) {
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_MAPPED_VRAM:
+ info->max_value.u64 = rscreen->info.vram_size;
+ break;
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_MAPPED_GTT:
+ info->max_value.u64 = rscreen->info.gart_size;
+ break;
+ case R600_QUERY_GPU_TEMPERATURE:
+ info->max_value.u64 = 125;
+ break;
+ case R600_QUERY_VRAM_VIS_USAGE:
+ info->max_value.u64 = rscreen->info.vram_vis_size;
+ break;
+ }
+
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
+ return 1;
+}
+
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
+static int r600_get_driver_query_group_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
+
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
+}
+
+void r600_query_init(struct r600_common_context *rctx)
+{
+ rctx->b.create_query = r600_create_query;
+ rctx->b.create_batch_query = r600_create_batch_query;
+ rctx->b.destroy_query = r600_destroy_query;
+ rctx->b.begin_query = r600_begin_query;
+ rctx->b.end_query = r600_end_query;
+ rctx->b.get_query_result = r600_get_query_result;
+ rctx->b.get_query_result_resource = r600_get_query_result_resource;
+ rctx->render_cond_atom.emit = r600_emit_query_predication;
+
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
+ rctx->b.render_condition = r600_render_condition;
+
+ LIST_INITHEAD(&rctx->active_queries);
+}
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+ rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_query.h b/lib/mesa/src/gallium/drivers/r600/r600_query.h
new file mode 100644
index 000000000..1a3c6839e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_query.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle <nicolai.haehnle@amd.com>
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "util/u_threaded_context.h"
+
+struct pipe_context;
+struct pipe_query;
+struct pipe_resource;
+
+struct r600_common_context;
+struct r600_common_screen;
+struct r600_query;
+struct r600_query_hw;
+struct r600_resource;
+
+enum {
+ R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
+ R600_QUERY_DECOMPRESS_CALLS,
+ R600_QUERY_MRT_DRAW_CALLS,
+ R600_QUERY_PRIM_RESTART_CALLS,
+ R600_QUERY_SPILL_DRAW_CALLS,
+ R600_QUERY_COMPUTE_CALLS,
+ R600_QUERY_SPILL_COMPUTE_CALLS,
+ R600_QUERY_DMA_CALLS,
+ R600_QUERY_CP_DMA_CALLS,
+ R600_QUERY_NUM_VS_FLUSHES,
+ R600_QUERY_NUM_PS_FLUSHES,
+ R600_QUERY_NUM_CS_FLUSHES,
+ R600_QUERY_NUM_CB_CACHE_FLUSHES,
+ R600_QUERY_NUM_DB_CACHE_FLUSHES,
+ R600_QUERY_NUM_L2_INVALIDATES,
+ R600_QUERY_NUM_L2_WRITEBACKS,
+ R600_QUERY_NUM_RESIDENT_HANDLES,
+ R600_QUERY_TC_OFFLOADED_SLOTS,
+ R600_QUERY_TC_DIRECT_SLOTS,
+ R600_QUERY_TC_NUM_SYNCS,
+ R600_QUERY_CS_THREAD_BUSY,
+ R600_QUERY_GALLIUM_THREAD_BUSY,
+ R600_QUERY_REQUESTED_VRAM,
+ R600_QUERY_REQUESTED_GTT,
+ R600_QUERY_MAPPED_VRAM,
+ R600_QUERY_MAPPED_GTT,
+ R600_QUERY_BUFFER_WAIT_TIME,
+ R600_QUERY_NUM_MAPPED_BUFFERS,
+ R600_QUERY_NUM_GFX_IBS,
+ R600_QUERY_NUM_SDMA_IBS,
+ R600_QUERY_GFX_BO_LIST_SIZE,
+ R600_QUERY_NUM_BYTES_MOVED,
+ R600_QUERY_NUM_EVICTIONS,
+ R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
+ R600_QUERY_VRAM_USAGE,
+ R600_QUERY_VRAM_VIS_USAGE,
+ R600_QUERY_GTT_USAGE,
+ R600_QUERY_GPU_TEMPERATURE,
+ R600_QUERY_CURRENT_GPU_SCLK,
+ R600_QUERY_CURRENT_GPU_MCLK,
+ R600_QUERY_GPU_LOAD,
+ R600_QUERY_GPU_SHADERS_BUSY,
+ R600_QUERY_GPU_TA_BUSY,
+ R600_QUERY_GPU_GDS_BUSY,
+ R600_QUERY_GPU_VGT_BUSY,
+ R600_QUERY_GPU_IA_BUSY,
+ R600_QUERY_GPU_SX_BUSY,
+ R600_QUERY_GPU_WD_BUSY,
+ R600_QUERY_GPU_BCI_BUSY,
+ R600_QUERY_GPU_SC_BUSY,
+ R600_QUERY_GPU_PA_BUSY,
+ R600_QUERY_GPU_DB_BUSY,
+ R600_QUERY_GPU_CP_BUSY,
+ R600_QUERY_GPU_CB_BUSY,
+ R600_QUERY_GPU_SDMA_BUSY,
+ R600_QUERY_GPU_PFP_BUSY,
+ R600_QUERY_GPU_MEQ_BUSY,
+ R600_QUERY_GPU_ME_BUSY,
+ R600_QUERY_GPU_SURF_SYNC_BUSY,
+ R600_QUERY_GPU_CP_DMA_BUSY,
+ R600_QUERY_GPU_SCRATCH_RAM_BUSY,
+ R600_QUERY_NUM_COMPILATIONS,
+ R600_QUERY_NUM_SHADERS_CREATED,
+ R600_QUERY_NUM_SHADER_CACHE_HITS,
+ R600_QUERY_GPIN_ASIC_ID,
+ R600_QUERY_GPIN_NUM_SIMD,
+ R600_QUERY_GPIN_NUM_RB,
+ R600_QUERY_GPIN_NUM_SPI,
+ R600_QUERY_GPIN_NUM_SE,
+
+ R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
+};
+
+enum {
+ R600_QUERY_GROUP_GPIN = 0,
+ R600_NUM_SW_QUERY_GROUPS
+};
+
+struct r600_query_ops {
+ void (*destroy)(struct r600_common_screen *, struct r600_query *);
+ bool (*begin)(struct r600_common_context *, struct r600_query *);
+ bool (*end)(struct r600_common_context *, struct r600_query *);
+ bool (*get_result)(struct r600_common_context *,
+ struct r600_query *, bool wait,
+ union pipe_query_result *result);
+ void (*get_result_resource)(struct r600_common_context *,
+ struct r600_query *, bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+};
+
+struct r600_query {
+ struct threaded_query b;
+ struct r600_query_ops *ops;
+
+ /* The type of query */
+ unsigned type;
+};
+
+enum {
+ R600_QUERY_HW_FLAG_NO_START = (1 << 0),
+ /* gap */
+ /* whether begin_query doesn't clear the result */
+ R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
+};
+
+struct r600_query_hw_ops {
+ bool (*prepare_buffer)(struct r600_common_screen *,
+ struct r600_query_hw *,
+ struct r600_resource *);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
+ void (*add_result)(struct r600_common_screen *screen,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+};
+
+struct r600_query_buffer {
+ /* The buffer where query results are stored. */
+ struct r600_resource *buf;
+ /* Offset of the next free result after current query data */
+ unsigned results_end;
+ /* If a query buffer is full, a new buffer is created and the old one
+ * is put in here. When we calculate the result, we sum up the samples
+ * from all buffers. */
+ struct r600_query_buffer *previous;
+};
+
+struct r600_query_hw {
+ struct r600_query b;
+ struct r600_query_hw_ops *ops;
+ unsigned flags;
+
+ /* The query buffer and how many results are in it. */
+ struct r600_query_buffer buffer;
+ /* Size of the result in memory for both begin_query and end_query,
+ * this can be one or two numbers, or it could even be a size of a structure. */
+ unsigned result_size;
+ /* The number of dwords for begin_query or end_query. */
+ unsigned num_cs_dw_begin;
+ unsigned num_cs_dw_end;
+ /* Linked list of queries */
+ struct list_head list;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
+
+ /* Workaround via compute shader */
+ struct r600_resource *workaround_buf;
+ unsigned workaround_offset;
+};
+
+bool r600_query_hw_init(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query);
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery);
+bool r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+bool r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+bool r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ union pipe_query_result *result);
+
+/* Performance counters */
+enum {
+ /* This block is part of the shader engine */
+ R600_PC_BLOCK_SE = (1 << 0),
+
+ /* Expose per-instance groups instead of summing all instances (within
+ * an SE). */
+ R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
+
+ /* Expose per-SE groups instead of summing instances across SEs. */
+ R600_PC_BLOCK_SE_GROUPS = (1 << 2),
+
+ /* Shader block */
+ R600_PC_BLOCK_SHADER = (1 << 3),
+
+ /* Non-shader block with perfcounters windowed by shaders. */
+ R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
+};
+
+/* Describes a hardware block with performance counters. Multiple instances of
+ * each block, possibly per-SE, may exist on the chip. Depending on the block
+ * and on the user's configuration, we either
+ * (a) expose every instance as a performance counter group,
+ * (b) expose a single performance counter group that reports the sum over all
+ * instances, or
+ * (c) expose one performance counter group per instance, but summed over all
+ * shader engines.
+ */
+struct r600_perfcounter_block {
+ const char *basename;
+ unsigned flags;
+ unsigned num_counters;
+ unsigned num_selectors;
+ unsigned num_instances;
+
+ unsigned num_groups;
+ char *group_names;
+ unsigned group_name_stride;
+
+ char *selector_names;
+ unsigned selector_name_stride;
+
+ void *data;
+};
+
+struct r600_perfcounters {
+ unsigned num_groups;
+ unsigned num_blocks;
+ struct r600_perfcounter_block *blocks;
+
+ unsigned num_start_cs_dwords;
+ unsigned num_stop_cs_dwords;
+ unsigned num_instance_cs_dwords;
+ unsigned num_shaders_cs_dwords;
+
+ unsigned num_shader_types;
+ const char * const *shader_type_suffixes;
+ const unsigned *shader_type_bits;
+
+ void (*get_size)(struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ unsigned *num_select_dw, unsigned *num_read_dw);
+
+ void (*emit_instance)(struct r600_common_context *,
+ int se, int instance);
+ void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
+ void (*emit_select)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_read)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ struct r600_resource *buffer, uint64_t va);
+
+ void (*cleanup)(struct r600_common_screen *);
+
+ bool separate_se;
+ bool separate_instance;
+};
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types);
+
+int r600_get_perfcounter_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_info *info);
+int r600_get_perfcounter_group_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+
+bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
+void r600_perfcounters_add_block(struct r600_common_screen *,
+ struct r600_perfcounters *,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data);
+void r600_perfcounters_do_destroy(struct r600_perfcounters *);
+void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query);
+
+struct r600_qbo_state {
+ void *saved_compute;
+ struct pipe_constant_buffer saved_const0;
+ struct pipe_shader_buffer saved_ssbo[3];
+};
+
+#endif /* R600_QUERY_H */
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_streamout.c b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c
new file mode 100644
index 000000000..78334066c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_streamout.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+
+#include "util/u_memory.h"
+#include "evergreend.h"
+
+#define R_008490_CP_STRMOUT_CNTL 0x008490
+#define R_028AB0_VGT_STRMOUT_EN 0x028AB0
+#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
+
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
+
+static struct pipe_stream_output_target *
+r600_create_so_target(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_so_target *t;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+
+ t = CALLOC_STRUCT(r600_so_target);
+ if (!t) {
+ return NULL;
+ }
+
+ u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
+ &t->buf_filled_size_offset,
+ (struct pipe_resource**)&t->buf_filled_size);
+ if (!t->buf_filled_size) {
+ FREE(t);
+ return NULL;
+ }
+
+ t->b.reference.count = 1;
+ t->b.context = ctx;
+ pipe_resource_reference(&t->b.buffer, buffer);
+ t->b.buffer_offset = buffer_offset;
+ t->b.buffer_size = buffer_size;
+
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
+ return &t->b;
+}
+
+static void r600_so_target_destroy(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct r600_so_target *t = (struct r600_so_target*)target;
+ pipe_resource_reference(&t->b.buffer, NULL);
+ r600_resource_reference(&t->buf_filled_size, NULL);
+ FREE(t);
+}
+
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
+{
+ struct r600_atom *begin = &rctx->streamout.begin_atom;
+ unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
+ unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
+ rctx->streamout.append_bitmask);
+
+ if (!num_bufs)
+ return;
+
+ rctx->streamout.num_dw_for_end =
+ 12 + /* flush_vgt_streamout */
+ num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
+
+ begin->num_dw = 12; /* flush_vgt_streamout */
+
+ begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
+
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
+ begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
+
+ begin->num_dw +=
+ num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
+ (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
+ (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
+
+ rctx->set_atom_dirty(rctx, begin, true);
+
+ r600_set_streamout_enable(rctx, true);
+}
+
+void r600_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned i;
+ unsigned enabled_mask = 0, append_bitmask = 0;
+
+ /* Stop streamout. */
+ if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
+ r600_emit_streamout_end(rctx);
+ }
+
+ /* Set the new targets. */
+ for (i = 0; i < num_targets; i++) {
+ pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
+ if (!targets[i])
+ continue;
+
+ r600_context_add_resource_size(ctx, targets[i]->buffer);
+ enabled_mask |= 1 << i;
+ if (offsets[i] == ((unsigned)-1))
+ append_bitmask |= 1 << i;
+ }
+ for (; i < rctx->streamout.num_targets; i++) {
+ pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
+ }
+
+ rctx->streamout.enabled_mask = enabled_mask;
+
+ rctx->streamout.num_targets = num_targets;
+ rctx->streamout.append_bitmask = append_bitmask;
+
+ if (num_targets) {
+ r600_streamout_buffers_dirty(rctx);
+ } else {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
+ r600_set_streamout_enable(rctx, false);
+ }
+}
+
+static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ unsigned reg_strmout_cntl;
+
+ /* The register is at different places on different ASICs. */
+ if (rctx->chip_class >= EVERGREEN) {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ } else {
+ reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
+ }
+
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct r600_so_target **t = rctx->streamout.targets;
+ uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
+ unsigned i, update_flags = 0;
+
+ r600_flush_vgt_streamout(rctx);
+
+ for (i = 0; i < rctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ t[i]->stride_in_dw = stride_in_dw[i];
+
+ uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
+
+ update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
+
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+ radeon_emit(cs, (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+ radeon_emit(cs, va >> 8); /* BUFFER_BASE */
+
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
+
+ /* R7xx requires this packet after updating BUFFER_BASE.
+ * Without this, R7xx locks up. */
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
+ radeon_emit(cs, i);
+ radeon_emit(cs, va >> 8);
+
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
+ }
+
+ if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
+ uint64_t va = t[i]->buf_filled_size->gpu_address +
+ t[i]->buf_filled_size_offset;
+
+ /* Append. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
+ RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
+ radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
+ radeon_emit(cs, update_flags);
+ }
+ rctx->streamout.begin_emitted = true;
+}
+
+void r600_emit_streamout_end(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct r600_so_target **t = rctx->streamout.targets;
+ unsigned i;
+ uint64_t va;
+
+ r600_flush_vgt_streamout(rctx);
+
+ for (i = 0; i < rctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
+ RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
+
+ /* Zero the buffer size. The counters (primitives generated,
+ * primitives emitted) may be enabled even if there is not
+ * buffer bound. This ensures that the primitives-emitted query
+ * won't increment. */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ t[i]->buf_filled_size_valid = true;
+ }
+
+ rctx->streamout.begin_emitted = false;
+ rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
+}
+
+/* STREAMOUT CONFIG DERIVED STATE
+ *
+ * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
+ * The buffer mask is an independent state, so no writes occur if there
+ * are no buffers bound.
+ */
+
+static void r600_emit_streamout_enable(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
+ unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+ unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
+ unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
+ rctx->streamout.enabled_stream_buffers_mask;
+
+ if (rctx->chip_class >= EVERGREEN) {
+ strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
+
+ strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
+ strmout_config_val |=
+ S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+ }
+ radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
+}
+
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
+{
+ bool old_strmout_en = r600_get_strmout_en(rctx);
+ unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
+
+ rctx->streamout.streamout_enabled = enable;
+
+ rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
+ (rctx->streamout.enabled_mask << 4) |
+ (rctx->streamout.enabled_mask << 8) |
+ (rctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
+ (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
+}
+
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ bool old_strmout_en = r600_get_strmout_en(rctx);
+
+ rctx->streamout.num_prims_gen_queries += diff;
+ assert(rctx->streamout.num_prims_gen_queries >= 0);
+
+ rctx->streamout.prims_gen_query_enabled =
+ rctx->streamout.num_prims_gen_queries != 0;
+
+ if (old_strmout_en != r600_get_strmout_en(rctx)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
+ }
+}
+
+void r600_streamout_init(struct r600_common_context *rctx)
+{
+ rctx->b.create_stream_output_target = r600_create_so_target;
+ rctx->b.stream_output_target_destroy = r600_so_target_destroy;
+ rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
+ rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
+ rctx->streamout.enable_atom.num_dw = 6;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c
new file mode 100644
index 000000000..9e1ff9e5f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_test_dma.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* This file implements randomized SDMA texture blit tests. */
+
+#include "r600_pipe_common.h"
+#include "util/u_surface.h"
+#include "util/rand_xor.h"
+
+static uint64_t seed_xorshift128plus[2];
+
+#define RAND_NUM_SIZE 8
+
+/* The GPU blits are emulated on the CPU using these CPU textures. */
+
+struct cpu_texture {
+ uint8_t *ptr;
+ uint64_t size;
+ uint64_t layer_stride;
+ unsigned stride;
+};
+
+static void alloc_cpu_texture(struct cpu_texture *tex,
+ struct pipe_resource *templ, int bpp)
+{
+ tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);
+ tex->layer_stride = (uint64_t)tex->stride * templ->height0;
+ tex->size = tex->layer_stride * templ->array_size;
+ tex->ptr = malloc(tex->size);
+ assert(tex->ptr);
+}
+
+static void set_random_pixels(struct pipe_context *ctx,
+ struct pipe_resource *tex,
+ struct cpu_texture *cpu)
+{
+ struct pipe_transfer *t;
+ uint8_t *map;
+ int x,y,z;
+
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
+ 0, 0, 0, tex->width0, tex->height0,
+ tex->array_size, &t);
+ assert(map);
+
+ for (z = 0; z < tex->array_size; z++) {
+ for (y = 0; y < tex->height0; y++) {
+ uint64_t *ptr = (uint64_t*)
+ (map + t->layer_stride*z + t->stride*y);
+ uint64_t *ptr_cpu = (uint64_t*)
+ (cpu->ptr + cpu->layer_stride*z + cpu->stride*y);
+ unsigned size = cpu->stride / RAND_NUM_SIZE;
+
+ assert(t->stride % RAND_NUM_SIZE == 0);
+ assert(cpu->stride % RAND_NUM_SIZE == 0);
+
+ for (x = 0; x < size; x++) {
+ *ptr++ = *ptr_cpu++ =
+ rand_xorshift128plus(seed_xorshift128plus);
+ }
+ }
+ }
+
+ pipe_transfer_unmap(ctx, t);
+}
+
+static bool compare_textures(struct pipe_context *ctx,
+ struct pipe_resource *tex,
+ struct cpu_texture *cpu, int bpp)
+{
+ struct pipe_transfer *t;
+ uint8_t *map;
+ int y,z;
+ bool pass = true;
+
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
+ 0, 0, 0, tex->width0, tex->height0,
+ tex->array_size, &t);
+ assert(map);
+
+ for (z = 0; z < tex->array_size; z++) {
+ for (y = 0; y < tex->height0; y++) {
+ uint8_t *ptr = map + t->layer_stride*z + t->stride*y;
+ uint8_t *cpu_ptr = cpu->ptr +
+ cpu->layer_stride*z + cpu->stride*y;
+
+ if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {
+ pass = false;
+ goto done;
+ }
+ }
+ }
+done:
+ pipe_transfer_unmap(ctx, t);
+ return pass;
+}
+
+static enum pipe_format get_format_from_bpp(int bpp)
+{
+ switch (bpp) {
+ case 1:
+ return PIPE_FORMAT_R8_UINT;
+ case 2:
+ return PIPE_FORMAT_R16_UINT;
+ case 4:
+ return PIPE_FORMAT_R32_UINT;
+ case 8:
+ return PIPE_FORMAT_R32G32_UINT;
+ case 16:
+ return PIPE_FORMAT_R32G32B32A32_UINT;
+ default:
+ assert(0);
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+static const char *array_mode_to_string(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf)
+{
+ if (rscreen->chip_class >= GFX9) {
+ /* TODO */
+ return " UNKNOWN";
+ } else {
+ switch (surf->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ return "LINEAR_ALIGNED";
+ case RADEON_SURF_MODE_1D:
+ return "1D_TILED_THIN1";
+ case RADEON_SURF_MODE_2D:
+ return "2D_TILED_THIN1";
+ default:
+ assert(0);
+ return " UNKNOWN";
+ }
+ }
+}
+
+static unsigned generate_max_tex_side(unsigned max_tex_side)
+{
+ switch (rand() % 4) {
+ case 0:
+ /* Try to hit large sizes in 1/4 of the cases. */
+ return max_tex_side;
+ case 1:
+ /* Try to hit 1D tiling in 1/4 of the cases. */
+ return 128;
+ default:
+ /* Try to hit common sizes in 2/4 of the cases. */
+ return 2048;
+ }
+}
+
+void r600_test_dma(struct r600_common_screen *rscreen)
+{
+ struct pipe_screen *screen = &rscreen->b;
+ struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ uint64_t max_alloc_size;
+ unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
+ unsigned num_pass = 0, num_fail = 0;
+
+ max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+ max_tex_side = 1 << (max_levels - 1);
+
+ /* Max 128 MB allowed for both textures. */
+ max_alloc_size = 128 * 1024 * 1024;
+
+ /* the seed for random test parameters */
+ srand(0x9b47d95b);
+ /* the seed for random pixel data */
+ s_rand_xorshift128plus(seed_xorshift128plus, false);
+
+ iterations = 1000000000; /* just kill it when you are bored */
+ num_partial_copies = 30;
+
+ /* These parameters are randomly generated per test:
+ * - whether to do one whole-surface copy or N partial copies per test
+ * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
+ * - which texture dimensions to use
+ * - whether to use VRAM (all tiling modes) and GTT (staging, linear
+ * only) allocations
+ * - random initial pixels in src
+ * - generate random subrectangle copies for partial blits
+ */
+ for (i = 0; i < iterations; i++) {
+ struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
+ struct r600_texture *rdst;
+ struct r600_texture *rsrc;
+ struct cpu_texture src_cpu, dst_cpu;
+ unsigned bpp, max_width, max_height, max_depth, j, num;
+ unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
+ unsigned max_tex_layers;
+ bool pass;
+ bool do_partial_copies = rand() & 1;
+
+ /* generate a random test case */
+ tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
+ tsrc.depth0 = tdst.depth0 = 1;
+
+ bpp = 1 << (rand() % 5);
+ tsrc.format = tdst.format = get_format_from_bpp(bpp);
+
+ max_tex_side_gen = generate_max_tex_side(max_tex_side);
+ max_tex_layers = rand() % 4 ? 1 : 5;
+
+ tsrc.width0 = (rand() % max_tex_side_gen) + 1;
+ tsrc.height0 = (rand() % max_tex_side_gen) + 1;
+ tsrc.array_size = (rand() % max_tex_layers) + 1;
+
+ /* Have a 1/4 chance of getting power-of-two dimensions. */
+ if (rand() % 4 == 0) {
+ tsrc.width0 = util_next_power_of_two(tsrc.width0);
+ tsrc.height0 = util_next_power_of_two(tsrc.height0);
+ }
+
+ if (!do_partial_copies) {
+ /* whole-surface copies only, same dimensions */
+ tdst = tsrc;
+ } else {
+ max_tex_side_gen = generate_max_tex_side(max_tex_side);
+ max_tex_layers = rand() % 4 ? 1 : 5;
+
+ /* many partial copies, dimensions can be different */
+ tdst.width0 = (rand() % max_tex_side_gen) + 1;
+ tdst.height0 = (rand() % max_tex_side_gen) + 1;
+ tdst.array_size = (rand() % max_tex_layers) + 1;
+
+ /* Have a 1/4 chance of getting power-of-two dimensions. */
+ if (rand() % 4 == 0) {
+ tdst.width0 = util_next_power_of_two(tdst.width0);
+ tdst.height0 = util_next_power_of_two(tdst.height0);
+ }
+ }
+
+ /* check texture sizes */
+ if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
+ (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
+ max_alloc_size) {
+ /* too large, try again */
+ i--;
+ continue;
+ }
+
+ /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
+ * or GTT + linear only (1/4 of cases)
+ */
+ tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
+ tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
+
+ /* Allocate textures (both the GPU and CPU copies).
+ * The CPU will emulate what the GPU should be doing.
+ */
+ src = screen->resource_create(screen, &tsrc);
+ dst = screen->resource_create(screen, &tdst);
+ assert(src);
+ assert(dst);
+ rdst = (struct r600_texture*)dst;
+ rsrc = (struct r600_texture*)src;
+ alloc_cpu_texture(&src_cpu, &tsrc, bpp);
+ alloc_cpu_texture(&dst_cpu, &tdst, bpp);
+
+ printf("%4u: dst = (%5u x %5u x %u, %s), "
+ " src = (%5u x %5u x %u, %s), bpp = %2u, ",
+ i, tdst.width0, tdst.height0, tdst.array_size,
+ array_mode_to_string(rscreen, &rdst->surface),
+ tsrc.width0, tsrc.height0, tsrc.array_size,
+ array_mode_to_string(rscreen, &rsrc->surface), bpp);
+ fflush(stdout);
+
+ /* set src pixels */
+ set_random_pixels(ctx, src, &src_cpu);
+
+ /* clear dst pixels */
+ rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
+ memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
+
+ /* preparation */
+ max_width = MIN2(tsrc.width0, tdst.width0);
+ max_height = MIN2(tsrc.height0, tdst.height0);
+ max_depth = MIN2(tsrc.array_size, tdst.array_size);
+
+ num = do_partial_copies ? num_partial_copies : 1;
+ for (j = 0; j < num; j++) {
+ int width, height, depth;
+ int srcx, srcy, srcz, dstx, dsty, dstz;
+ struct pipe_box box;
+ unsigned old_num_draw_calls = rctx->num_draw_calls;
+ unsigned old_num_dma_calls = rctx->num_dma_calls;
+
+ if (!do_partial_copies) {
+ /* copy whole src to dst */
+ width = max_width;
+ height = max_height;
+ depth = max_depth;
+
+ srcx = srcy = srcz = dstx = dsty = dstz = 0;
+ } else {
+ /* random sub-rectangle copies from src to dst */
+ depth = (rand() % max_depth) + 1;
+ srcz = rand() % (tsrc.array_size - depth + 1);
+ dstz = rand() % (tdst.array_size - depth + 1);
+
+ /* special code path to hit the tiled partial copies */
+ if (!rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
+ rand() & 1) {
+ if (max_width < 8 || max_height < 8)
+ continue;
+ width = ((rand() % (max_width / 8)) + 1) * 8;
+ height = ((rand() % (max_height / 8)) + 1) * 8;
+
+ srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
+ srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;
+
+ dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
+ dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
+ } else {
+ /* just make sure that it doesn't divide by zero */
+ assert(max_width > 0 && max_height > 0);
+
+ width = (rand() % max_width) + 1;
+ height = (rand() % max_height) + 1;
+
+ srcx = rand() % (tsrc.width0 - width + 1);
+ srcy = rand() % (tsrc.height0 - height + 1);
+
+ dstx = rand() % (tdst.width0 - width + 1);
+ dsty = rand() % (tdst.height0 - height + 1);
+ }
+
+ /* special code path to hit out-of-bounds reads in L2T */
+ if (rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
+ rand() % 4 == 0) {
+ srcx = 0;
+ srcy = 0;
+ srcz = 0;
+ }
+ }
+
+ /* GPU copy */
+ u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
+ rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
+
+ /* See which engine was used. */
+ gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
+ dma_blits += rctx->num_dma_calls > old_num_dma_calls;
+
+ /* CPU copy */
+ util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
+ dst_cpu.layer_stride,
+ dstx, dsty, dstz, width, height, depth,
+ src_cpu.ptr, src_cpu.stride,
+ src_cpu.layer_stride,
+ srcx, srcy, srcz);
+ }
+
+ pass = compare_textures(ctx, dst, &dst_cpu, bpp);
+ if (pass)
+ num_pass++;
+ else
+ num_fail++;
+
+ printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
+ gfx_blits, dma_blits, pass ? "pass" : "fail",
+ num_pass, num_pass+num_fail);
+
+ /* cleanup */
+ pipe_resource_reference(&src, NULL);
+ pipe_resource_reference(&dst, NULL);
+ free(src_cpu.ptr);
+ free(dst_cpu.ptr);
+ }
+
+ ctx->destroy(ctx);
+ exit(0);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_texture.c b/lib/mesa/src/gallium/drivers/r600/r600_texture.c
new file mode 100644
index 000000000..b84111449
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_texture.c
@@ -0,0 +1,1953 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ * Corbin Simpson
+ */
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "r600_query.h"
+#include "util/u_format.h"
+#include "util/u_log.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "util/u_surface.h"
+#include "os/os_time.h"
+#include <errno.h>
+#include <inttypes.h>
+
+static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex);
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ);
+
+
+bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ if (!rctx->dma.cs)
+ return false;
+
+ if (rdst->surface.bpe != rsrc->surface.bpe)
+ return false;
+
+ /* MSAA: Blits don't exist in the real world. */
+ if (rsrc->resource.b.b.nr_samples > 1 ||
+ rdst->resource.b.b.nr_samples > 1)
+ return false;
+
+ /* Depth-stencil surfaces:
+ * When dst is linear, the DB->CB copy preserves HTILE.
+ * When dst is tiled, the 3D path must be used to update HTILE.
+ */
+ if (rsrc->is_depth || rdst->is_depth)
+ return false;
+
+ /* CMASK as:
+ * src: Both texture and SDMA paths need decompression. Use SDMA.
+ * dst: If overwriting the whole texture, discard CMASK and use
+ * SDMA. Otherwise, use the 3D path.
+ */
+ if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
+ /* The CMASK clear is only enabled for the first level. */
+ assert(dst_level == 0);
+ if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
+ dstx, dsty, dstz, src_box->width,
+ src_box->height, src_box->depth))
+ return false;
+
+ r600_texture_discard_cmask(rctx->screen, rdst);
+ }
+
+ /* All requirements are met. Prepare textures for SDMA. */
+ if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
+ rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
+
+ assert(!(rsrc->dirty_level_mask & (1 << src_level)));
+ assert(!(rdst->dirty_level_mask & (1 << dst_level)));
+
+ return true;
+}
+
+/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
+static void r600_copy_region_with_blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit;
+
+ memset(&blit, 0, sizeof(blit));
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.level = src_level;
+ blit.src.box = *src_box;
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.level = dst_level;
+ blit.dst.box.x = dstx;
+ blit.dst.box.y = dsty;
+ blit.dst.box.z = dstz;
+ blit.dst.box.width = src_box->width;
+ blit.dst.box.height = src_box->height;
+ blit.dst.box.depth = src_box->depth;
+ blit.mask = util_format_get_mask(src->format) &
+ util_format_get_mask(dst->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ if (blit.mask) {
+ pipe->blit(pipe, &blit);
+ }
+}
+
+/* Copy from a full GPU texture to a transfer's staging one. */
+static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *dst = &rtransfer->staging->b.b;
+ struct pipe_resource *src = transfer->resource;
+
+ if (src->nr_samples > 1) {
+ r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
+ src, transfer->level, &transfer->box);
+ return;
+ }
+
+ rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+ &transfer->box);
+}
+
+/* Copy from a transfer's staging texture to a full GPU one. */
+static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *dst = transfer->resource;
+ struct pipe_resource *src = &rtransfer->staging->b.b;
+ struct pipe_box sbox;
+
+ u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
+
+ if (dst->nr_samples > 1) {
+ r600_copy_region_with_blit(ctx, dst, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ src, 0, &sbox);
+ return;
+ }
+
+ rctx->dma_copy(ctx, dst, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ src, 0, &sbox);
+}
+
+static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, unsigned level,
+ const struct pipe_box *box,
+ unsigned *stride,
+ unsigned *layer_stride)
+{
+ *stride = rtex->surface.u.legacy.level[level].nblk_x *
+ rtex->surface.bpe;
+ *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
+
+ if (!box)
+ return rtex->surface.u.legacy.level[level].offset;
+
+ /* Each texture is an array of mipmap levels. Each level is
+ * an array of slices. */
+ return rtex->surface.u.legacy.level[level].offset +
+ box->z * rtex->surface.u.legacy.level[level].slice_size +
+ (box->y / rtex->surface.blk_h *
+ rtex->surface.u.legacy.level[level].nblk_x +
+ box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+}
+
+static int r600_init_surface(struct r600_common_screen *rscreen,
+ struct radeon_surf *surface,
+ const struct pipe_resource *ptex,
+ enum radeon_surf_mode array_mode,
+ unsigned pitch_in_bytes_override,
+ unsigned offset,
+ bool is_imported,
+ bool is_scanout,
+ bool is_flushed_depth)
+{
+ const struct util_format_description *desc =
+ util_format_description(ptex->format);
+ bool is_depth, is_stencil;
+ int r;
+ unsigned i, bpe, flags = 0;
+
+ is_depth = util_format_has_depth(desc);
+ is_stencil = util_format_has_stencil(desc);
+
+ if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
+ ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+ bpe = 4; /* stencil is allocated separately on evergreen */
+ } else {
+ bpe = util_format_get_blocksize(ptex->format);
+ assert(util_is_power_of_two(bpe));
+ }
+
+ if (!is_flushed_depth && is_depth) {
+ flags |= RADEON_SURF_ZBUFFER;
+
+ if (is_stencil)
+ flags |= RADEON_SURF_SBUFFER;
+ }
+
+ if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
+ /* This should catch bugs in gallium users setting incorrect flags. */
+ assert(ptex->nr_samples <= 1 &&
+ ptex->array_size == 1 &&
+ ptex->depth0 == 1 &&
+ ptex->last_level == 0 &&
+ !(flags & RADEON_SURF_Z_OR_SBUFFER));
+
+ flags |= RADEON_SURF_SCANOUT;
+ }
+
+ if (ptex->bind & PIPE_BIND_SHARED)
+ flags |= RADEON_SURF_SHAREABLE;
+ if (is_imported)
+ flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
+ if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
+ flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
+
+ r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
+ array_mode, surface);
+ if (r) {
+ return r;
+ }
+
+ if (pitch_in_bytes_override &&
+ pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
+ /* old ddx on evergreen over estimate alignment for 1d, only 1 level
+ * for those
+ */
+ surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
+ surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
+ surface->u.legacy.level[0].nblk_y;
+ }
+
+ if (offset) {
+ for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ surface->u.legacy.level[i].offset += offset;
+ }
+
+ return 0;
+}
+
+static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *metadata)
+{
+ struct radeon_surf *surface = &rtex->surface;
+
+ memset(metadata, 0, sizeof(*metadata));
+
+ metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+ metadata->u.legacy.bankw = surface->u.legacy.bankw;
+ metadata->u.legacy.bankh = surface->u.legacy.bankh;
+ metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+ metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+ metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+ metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+ metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+}
+
+static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf,
+ struct radeon_bo_metadata *metadata,
+ enum radeon_surf_mode *array_mode,
+ bool *is_scanout)
+{
+ surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
+ surf->u.legacy.bankw = metadata->u.legacy.bankw;
+ surf->u.legacy.bankh = metadata->u.legacy.bankh;
+ surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
+ surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
+ surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
+
+ if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_2D;
+ else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_1D;
+ else
+ *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ *is_scanout = metadata->u.legacy.scanout;
+}
+
+static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+ struct pipe_context *ctx = &rctx->b;
+
+ if (ctx == rscreen->aux_context)
+ mtx_lock(&rscreen->aux_context_lock);
+
+ ctx->flush_resource(ctx, &rtex->resource.b.b);
+ ctx->flush(ctx, NULL, 0);
+
+ if (ctx == rscreen->aux_context)
+ mtx_unlock(&rscreen->aux_context_lock);
+}
+
+static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (!rtex->cmask.size)
+ return;
+
+ assert(rtex->resource.b.b.nr_samples <= 1);
+
+ /* Disable CMASK. */
+ memset(&rtex->cmask, 0, sizeof(rtex->cmask));
+ rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
+ rtex->dirty_level_mask = 0;
+
+ rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
+
+ if (rtex->cmask_buffer != &rtex->resource)
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
+
+ /* Notify all contexts about the change. */
+ p_atomic_inc(&rscreen->dirty_tex_counter);
+ p_atomic_inc(&rscreen->compressed_colortex_counter);
+}
+
+static void r600_reallocate_texture_inplace(struct r600_common_context *rctx,
+ struct r600_texture *rtex,
+ unsigned new_bind_flag,
+ bool invalidate_storage)
+{
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_texture *new_tex;
+ struct pipe_resource templ = rtex->resource.b.b;
+ unsigned i;
+
+ templ.bind |= new_bind_flag;
+
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ if (rctx->chip_class < SI)
+ return;
+
+ if (rtex->resource.b.is_shared)
+ return;
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ if (rtex->surface.is_linear)
+ return;
+
+ /* This fails with MSAA, depth, and compressed textures. */
+ if (r600_choose_tiling(rctx->screen, &templ) !=
+ RADEON_SURF_MODE_LINEAR_ALIGNED)
+ return;
+ }
+
+ new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
+ if (!new_tex)
+ return;
+
+ /* Copy the pixels to the new texture. */
+ if (!invalidate_storage) {
+ for (i = 0; i <= templ.last_level; i++) {
+ struct pipe_box box;
+
+ u_box_3d(0, 0, 0,
+ u_minify(templ.width0, i), u_minify(templ.height0, i),
+ util_max_layer(&templ, i) + 1, &box);
+
+ rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
+ &rtex->resource.b.b, i, &box);
+ }
+ }
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ r600_texture_discard_cmask(rctx->screen, rtex);
+ }
+
+ /* Replace the structure fields of rtex. */
+ rtex->resource.b.b.bind = templ.bind;
+ pb_reference(&rtex->resource.buf, new_tex->resource.buf);
+ rtex->resource.gpu_address = new_tex->resource.gpu_address;
+ rtex->resource.vram_usage = new_tex->resource.vram_usage;
+ rtex->resource.gart_usage = new_tex->resource.gart_usage;
+ rtex->resource.bo_size = new_tex->resource.bo_size;
+ rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
+ rtex->resource.domains = new_tex->resource.domains;
+ rtex->resource.flags = new_tex->resource.flags;
+ rtex->size = new_tex->size;
+ rtex->db_render_format = new_tex->db_render_format;
+ rtex->db_compatible = new_tex->db_compatible;
+ rtex->can_sample_z = new_tex->can_sample_z;
+ rtex->can_sample_s = new_tex->can_sample_s;
+ rtex->surface = new_tex->surface;
+ rtex->fmask = new_tex->fmask;
+ rtex->cmask = new_tex->cmask;
+ rtex->cb_color_info = new_tex->cb_color_info;
+ rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
+ rtex->htile_offset = new_tex->htile_offset;
+ rtex->depth_cleared = new_tex->depth_cleared;
+ rtex->stencil_cleared = new_tex->stencil_cleared;
+ rtex->non_disp_tiling = new_tex->non_disp_tiling;
+ rtex->framebuffers_bound = new_tex->framebuffers_bound;
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ assert(!rtex->htile_offset);
+ assert(!rtex->cmask.size);
+ assert(!rtex->fmask.size);
+ assert(!rtex->is_depth);
+ }
+
+ r600_texture_reference(&new_tex, NULL);
+
+ p_atomic_inc(&rctx->screen->dirty_tex_counter);
+}
+
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_common_context *rctx;
+ struct r600_resource *res = (struct r600_resource*)resource;
+ struct r600_texture *rtex = (struct r600_texture*)resource;
+ struct radeon_bo_metadata metadata;
+ bool update_metadata = false;
+ unsigned stride, offset, slice_size;
+
+ ctx = threaded_context_unwrap_sync(ctx);
+ rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
+
+ if (resource->target != PIPE_BUFFER) {
+ /* This is not supported now, but it might be required for OpenCL
+ * interop in the future.
+ */
+ if (resource->nr_samples > 1 || rtex->is_depth)
+ return false;
+
+ /* Move a suballocated texture into a non-suballocated allocation. */
+ if (rscreen->ws->buffer_is_suballocated(res->buf) ||
+ rtex->surface.tile_swizzle) {
+ assert(!res->b.is_shared);
+ r600_reallocate_texture_inplace(rctx, rtex,
+ PIPE_BIND_SHARED, false);
+ rctx->b.flush(&rctx->b, NULL, 0);
+ assert(res->b.b.bind & PIPE_BIND_SHARED);
+ assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+ assert(rtex->surface.tile_swizzle == 0);
+ }
+
+ if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
+ rtex->cmask.size) {
+ /* Eliminate fast clear (CMASK) */
+ r600_eliminate_fast_color_clear(rctx, rtex);
+
+ /* Disable CMASK if flush_resource isn't going
+ * to be called.
+ */
+ if (rtex->cmask.size)
+ r600_texture_discard_cmask(rscreen, rtex);
+ }
+
+ /* Set metadata. */
+ if (!res->b.is_shared || update_metadata) {
+ r600_texture_init_metadata(rscreen, rtex, &metadata);
+ if (rscreen->query_opaque_metadata)
+ rscreen->query_opaque_metadata(rscreen, rtex,
+ &metadata);
+
+ rscreen->ws->buffer_set_metadata(res->buf, &metadata);
+ }
+
+ offset = rtex->surface.u.legacy.level[0].offset;
+ stride = rtex->surface.u.legacy.level[0].nblk_x *
+ rtex->surface.bpe;
+ slice_size = rtex->surface.u.legacy.level[0].slice_size;
+ } else {
+ /* Move a suballocated buffer into a non-suballocated allocation. */
+ if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+ assert(!res->b.is_shared);
+
+ /* Allocate a new buffer with PIPE_BIND_SHARED. */
+ struct pipe_resource templ = res->b.b;
+ templ.bind |= PIPE_BIND_SHARED;
+
+ struct pipe_resource *newb =
+ screen->resource_create(screen, &templ);
+ if (!newb)
+ return false;
+
+ /* Copy the old buffer contents to the new one. */
+ struct pipe_box box;
+ u_box_1d(0, newb->width0, &box);
+ rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
+ &res->b.b, 0, &box);
+ /* Move the new buffer storage to the old pipe_resource. */
+ r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
+ pipe_resource_reference(&newb, NULL);
+
+ assert(res->b.b.bind & PIPE_BIND_SHARED);
+ assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+ }
+
+ /* Buffers */
+ offset = 0;
+ stride = 0;
+ slice_size = 0;
+ }
+
+ if (res->b.is_shared) {
+ /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
+ * doesn't set it.
+ */
+ res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
+ if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
+ res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
+ } else {
+ res->b.is_shared = true;
+ res->external_usage = usage;
+ }
+
+ return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
+ slice_size, whandle);
+}
+
+static void r600_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *ptex)
+{
+ struct r600_texture *rtex = (struct r600_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+
+ r600_texture_reference(&rtex->flushed_depth_texture, NULL);
+
+ if (rtex->cmask_buffer != &rtex->resource) {
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
+ }
+ pb_reference(&resource->buf, NULL);
+ FREE(rtex);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl;
+
+/* The number of samples can be specified independently of the texture. */
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out)
+{
+ /* FMASK is allocated like an ordinary texture. */
+ struct pipe_resource templ = rtex->resource.b.b;
+ struct radeon_surf fmask = {};
+ unsigned flags, bpe;
+
+ memset(out, 0, sizeof(*out));
+
+ templ.nr_samples = 1;
+ flags = rtex->surface.flags | RADEON_SURF_FMASK;
+
+ /* Use the same parameters and tile mode. */
+ fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
+ fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
+ fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
+ fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
+
+ if (nr_samples <= 4)
+ fmask.u.legacy.bankh = 4;
+
+ switch (nr_samples) {
+ case 2:
+ case 4:
+ bpe = 1;
+ break;
+ case 8:
+ bpe = 4;
+ break;
+ default:
+ R600_ERR("Invalid sample count for FMASK allocation.\n");
+ return;
+ }
+
+ /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
+ * This can be fixed by writing a separate FMASK allocator specifically
+ * for R600-R700 asics. */
+ if (rscreen->chip_class <= R700) {
+ bpe *= 2;
+ }
+
+ if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
+ RADEON_SURF_MODE_2D, &fmask)) {
+ R600_ERR("Got error in surface_init while allocating FMASK.\n");
+ return;
+ }
+
+ assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
+
+ out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
+ if (out->slice_tile_max)
+ out->slice_tile_max -= 1;
+
+ out->tile_mode_index = fmask.u.legacy.tiling_index[0];
+ out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
+ out->bank_height = fmask.u.legacy.bankh;
+ out->tile_swizzle = fmask.tile_swizzle;
+ out->alignment = MAX2(256, fmask.surf_alignment);
+ out->size = fmask.surf_size;
+}
+
+static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_fmask_info(rscreen, rtex,
+ rtex->resource.b.b.nr_samples, &rtex->fmask);
+
+ rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
+ rtex->size = rtex->fmask.offset + rtex->fmask.size;
+}
+
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct r600_cmask_info *out)
+{
+ unsigned cmask_tile_width = 8;
+ unsigned cmask_tile_height = 8;
+ unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
+ unsigned element_bits = 4;
+ unsigned cmask_cache_bits = 1024;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+
+ unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
+ unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
+ unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
+ unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
+ unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
+
+ unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
+ unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
+
+ unsigned base_align = num_pipes * pipe_interleave_bytes;
+ unsigned slice_bytes =
+ ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
+
+ assert(macro_tile_width % 128 == 0);
+ assert(macro_tile_height % 128 == 0);
+
+ out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
+ out->alignment = MAX2(256, base_align);
+ out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+ align(slice_bytes, base_align);
+}
+
+static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+
+ rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
+ rtex->size = rtex->cmask.offset + rtex->cmask.size;
+
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+}
+
+static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (rtex->cmask_buffer)
+ return;
+
+ assert(rtex->cmask.size == 0);
+
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+
+ rtex->cmask_buffer = (struct r600_resource *)
+ r600_aligned_buffer_create(&rscreen->b,
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ rtex->cmask.size,
+ rtex->cmask.alignment);
+ if (rtex->cmask_buffer == NULL) {
+ rtex->cmask.size = 0;
+ return;
+ }
+
+ /* update colorbuffer state bits */
+ rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
+
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+
+ p_atomic_inc(&rscreen->compressed_colortex_counter);
+}
+
+static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ unsigned cl_width, cl_height, width, height;
+ unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+
+ rtex->surface.htile_size = 0;
+
+ if (rscreen->chip_class <= EVERGREEN &&
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
+ return;
+
+ /* HW bug on R6xx. */
+ if (rscreen->chip_class == R600 &&
+ (rtex->resource.b.b.width0 > 7680 ||
+ rtex->resource.b.b.height0 > 7680))
+ return;
+
+ switch (num_pipes) {
+ case 1:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 2:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 4:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ case 16:
+ cl_width = 128;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ width = align(rtex->resource.b.b.width0, cl_width * 8);
+ height = align(rtex->resource.b.b.height0, cl_height * 8);
+
+ slice_elements = (width * height) / (8 * 8);
+ slice_bytes = slice_elements * 4;
+
+ pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+ base_align = num_pipes * pipe_interleave_bytes;
+
+ rtex->surface.htile_alignment = base_align;
+ rtex->surface.htile_size =
+ (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+ align(slice_bytes, base_align);
+}
+
+static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_htile_size(rscreen, rtex);
+
+ if (!rtex->surface.htile_size)
+ return;
+
+ rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
+ rtex->size = rtex->htile_offset + rtex->surface.htile_size;
+}
+
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log)
+{
+ int i;
+
+ /* Common parameters. */
+ u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+ "blk_h=%u, array_size=%u, last_level=%u, "
+ "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
+ rtex->resource.b.b.width0, rtex->resource.b.b.height0,
+ rtex->resource.b.b.depth0, rtex->surface.blk_w,
+ rtex->surface.blk_h,
+ rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
+ rtex->surface.bpe, rtex->resource.b.b.nr_samples,
+ rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
+
+ u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
+ "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
+ rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
+ rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
+ rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
+ (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
+
+ if (rtex->fmask.size)
+ u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
+ "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
+ rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
+ rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
+ rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
+
+ if (rtex->cmask.size)
+ u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
+ "slice_tile_max=%u\n",
+ rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
+ rtex->cmask.slice_tile_max);
+
+ if (rtex->htile_offset)
+ u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
+ "alignment=%u\n",
+ rtex->htile_offset, rtex->surface.htile_size,
+ rtex->surface.htile_alignment);
+
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++)
+ u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
+ "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.level[i].offset,
+ rtex->surface.u.legacy.level[i].slice_size,
+ u_minify(rtex->resource.b.b.width0, i),
+ u_minify(rtex->resource.b.b.height0, i),
+ u_minify(rtex->resource.b.b.depth0, i),
+ rtex->surface.u.legacy.level[i].nblk_x,
+ rtex->surface.u.legacy.level[i].nblk_y,
+ rtex->surface.u.legacy.level[i].mode,
+ rtex->surface.u.legacy.tiling_index[i]);
+
+ if (rtex->surface.has_stencil) {
+ u_log_printf(log, " StencilLayout: tilesplit=%u\n",
+ rtex->surface.u.legacy.stencil_tile_split);
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
+ u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
+ "slice_size=%"PRIu64", npix_x=%u, "
+ "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.stencil_level[i].offset,
+ rtex->surface.u.legacy.stencil_level[i].slice_size,
+ u_minify(rtex->resource.b.b.width0, i),
+ u_minify(rtex->resource.b.b.height0, i),
+ u_minify(rtex->resource.b.b.depth0, i),
+ rtex->surface.u.legacy.stencil_level[i].nblk_x,
+ rtex->surface.u.legacy.stencil_level[i].nblk_y,
+ rtex->surface.u.legacy.stencil_level[i].mode,
+ rtex->surface.u.legacy.stencil_tiling_index[i]);
+ }
+ }
+}
+
+/* Common processing for r600_texture_create and r600_texture_from_handle */
+static struct r600_texture *
+r600_texture_create_object(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ struct pb_buffer *buf,
+ struct radeon_surf *surface)
+{
+ struct r600_texture *rtex;
+ struct r600_resource *resource;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+ rtex = CALLOC_STRUCT(r600_texture);
+ if (!rtex)
+ return NULL;
+
+ resource = &rtex->resource;
+ resource->b.b = *base;
+ resource->b.b.next = NULL;
+ resource->b.vtbl = &r600_texture_vtbl;
+ pipe_reference_init(&resource->b.b.reference, 1);
+ resource->b.b.screen = screen;
+
+ /* don't include stencil-only formats which we don't support for rendering */
+ rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
+
+ rtex->surface = *surface;
+ rtex->size = rtex->surface.surf_size;
+ rtex->db_render_format = base->format;
+
+ /* Tiled depth textures utilize the non-displayable tile order.
+ * This must be done after r600_setup_surface.
+ * Applies to R600-Cayman. */
+ rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
+ /* Applies to GCN. */
+ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
+
+ if (rtex->is_depth) {
+ if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
+ R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
+ rscreen->chip_class >= EVERGREEN) {
+ rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
+ rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
+ } else {
+ if (rtex->resource.b.b.nr_samples <= 1 &&
+ (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+ rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
+ rtex->can_sample_z = true;
+ }
+
+ if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
+ R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
+ rtex->db_compatible = true;
+
+ if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
+ r600_texture_allocate_htile(rscreen, rtex);
+ }
+ } else {
+ if (base->nr_samples > 1) {
+ if (!buf) {
+ r600_texture_allocate_fmask(rscreen, rtex);
+ r600_texture_allocate_cmask(rscreen, rtex);
+ rtex->cmask_buffer = &rtex->resource;
+ }
+ if (!rtex->fmask.size || !rtex->cmask.size) {
+ FREE(rtex);
+ return NULL;
+ }
+ }
+ }
+
+ /* Now create the backing buffer. */
+ if (!buf) {
+ r600_init_resource_fields(rscreen, resource, rtex->size,
+ rtex->surface.surf_alignment);
+
+ /* Displayable surfaces are not suballocated. */
+ if (resource->b.b.bind & PIPE_BIND_SCANOUT)
+ resource->flags |= RADEON_FLAG_NO_SUBALLOC;
+
+ if (!r600_alloc_resource(rscreen, resource)) {
+ FREE(rtex);
+ return NULL;
+ }
+ } else {
+ resource->buf = buf;
+ resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
+ resource->bo_size = buf->size;
+ resource->bo_alignment = buf->alignment;
+ resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
+ if (resource->domains & RADEON_DOMAIN_VRAM)
+ resource->vram_usage = buf->size;
+ else if (resource->domains & RADEON_DOMAIN_GTT)
+ resource->gart_usage = buf->size;
+ }
+
+ if (rtex->cmask.size) {
+ /* Initialize the cmask to 0xCC (= compressed state). */
+ r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
+ rtex->cmask.offset, rtex->cmask.size,
+ 0xCCCCCCCC);
+ }
+ if (rtex->htile_offset) {
+ uint32_t clear_value = 0;
+
+ r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+ rtex->htile_offset,
+ rtex->surface.htile_size,
+ clear_value);
+ }
+
+ /* Initialize the CMASK base register value. */
+ rtex->cmask.base_address_reg =
+ (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+ if (rscreen->debug_flags & DBG_VM) {
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
+ rtex->resource.gpu_address,
+ rtex->resource.gpu_address + rtex->resource.buf->size,
+ base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
+ base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
+ }
+
+ if (rscreen->debug_flags & DBG_TEX) {
+ puts("Texture:");
+ struct u_log_context log;
+ u_log_context_init(&log);
+ r600_print_texture_info(rscreen, rtex, &log);
+ u_log_new_page_print(&log, stdout);
+ fflush(stdout);
+ u_log_context_destroy(&log);
+ }
+
+ return rtex;
+}
+
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ)
+{
+ const struct util_format_description *desc = util_format_description(templ->format);
+ bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
+ bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
+ !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+
+ /* MSAA resources must be 2D tiled. */
+ if (templ->nr_samples > 1)
+ return RADEON_SURF_MODE_2D;
+
+ /* Transfer resources should be linear. */
+ if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
+ if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
+ (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
+ (templ->target == PIPE_TEXTURE_2D ||
+ templ->target == PIPE_TEXTURE_3D))
+ force_tiling = true;
+
+ /* Handle common candidates for the linear mode.
+ * Compressed textures and DB surfaces must always be tiled.
+ */
+ if (!force_tiling &&
+ !is_depth_stencil &&
+ !util_format_is_compressed(templ->format)) {
+ if (rscreen->debug_flags & DBG_NO_TILING)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ if (templ->bind & PIPE_BIND_LINEAR)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Textures with a very small height are recommended to be linear. */
+ if (templ->target == PIPE_TEXTURE_1D ||
+ templ->target == PIPE_TEXTURE_1D_ARRAY ||
+ /* Only very thin and long 2D textures should benefit from
+ * linear_aligned. */
+ (templ->width0 > 8 && templ->height0 <= 2))
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Textures likely to be mapped often. */
+ if (templ->usage == PIPE_USAGE_STAGING ||
+ templ->usage == PIPE_USAGE_STREAM)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ }
+
+ /* Make small textures 1D tiled. */
+ if (templ->width0 <= 16 || templ->height0 <= 16 ||
+ (rscreen->debug_flags & DBG_NO_2D_TILING))
+ return RADEON_SURF_MODE_1D;
+
+ /* The allocator will switch to 1D if needed. */
+ return RADEON_SURF_MODE_2D;
+}
+
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_surf surface = {0};
+ bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+ int r;
+
+ r = r600_init_surface(rscreen, &surface, templ,
+ r600_choose_tiling(rscreen, templ), 0, 0,
+ false, false, is_flushed_depth);
+ if (r) {
+ return NULL;
+ }
+
+ return (struct pipe_resource *)
+ r600_texture_create_object(screen, templ, NULL, &surface);
+}
+
+static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct pb_buffer *buf = NULL;
+ unsigned stride = 0, offset = 0;
+ enum radeon_surf_mode array_mode;
+ struct radeon_surf surface = {};
+ int r;
+ struct radeon_bo_metadata metadata = {};
+ struct r600_texture *rtex;
+ bool is_scanout;
+
+ /* Support only 2D textures without mipmaps */
+ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+ templ->depth0 != 1 || templ->last_level != 0)
+ return NULL;
+
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
+ if (!buf)
+ return NULL;
+
+ rscreen->ws->buffer_get_metadata(buf, &metadata);
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
+
+ r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
+ offset, true, is_scanout, false);
+ if (r) {
+ return NULL;
+ }
+
+ rtex = r600_texture_create_object(screen, templ, buf, &surface);
+ if (!rtex)
+ return NULL;
+
+ rtex->resource.b.is_shared = true;
+ rtex->resource.external_usage = usage;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
+ assert(rtex->surface.tile_swizzle == 0);
+ return &rtex->resource.b.b;
+}
+
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging)
+{
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+ struct pipe_resource resource;
+ struct r600_texture **flushed_depth_texture = staging ?
+ staging : &rtex->flushed_depth_texture;
+ enum pipe_format pipe_format = texture->format;
+
+ if (!staging) {
+ if (rtex->flushed_depth_texture)
+ return true; /* it's ready */
+
+ if (!rtex->can_sample_z && rtex->can_sample_s) {
+ switch (pipe_format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Save memory by not allocating the S plane. */
+ pipe_format = PIPE_FORMAT_Z32_FLOAT;
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ /* Save memory bandwidth by not copying the
+ * stencil part during flush.
+ *
+ * This potentially increases memory bandwidth
+ * if an application uses both Z and S texturing
+ * simultaneously (a flushed Z24S8 texture
+ * would be stored compactly), but how often
+ * does that really happen?
+ */
+ pipe_format = PIPE_FORMAT_Z24X8_UNORM;
+ break;
+ default:;
+ }
+ } else if (!rtex->can_sample_s && rtex->can_sample_z) {
+ assert(util_format_has_stencil(util_format_description(pipe_format)));
+
+ /* DB->CB copies to an 8bpp surface don't work. */
+ pipe_format = PIPE_FORMAT_X24S8_UINT;
+ }
+ }
+
+ memset(&resource, 0, sizeof(resource));
+ resource.target = texture->target;
+ resource.format = pipe_format;
+ resource.width0 = texture->width0;
+ resource.height0 = texture->height0;
+ resource.depth0 = texture->depth0;
+ resource.array_size = texture->array_size;
+ resource.last_level = texture->last_level;
+ resource.nr_samples = texture->nr_samples;
+ resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+ resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
+ resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+
+ if (staging)
+ resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
+
+ *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
+ if (*flushed_depth_texture == NULL) {
+ R600_ERR("failed to create temporary texture to hold flushed depth\n");
+ return false;
+ }
+
+ (*flushed_depth_texture)->non_disp_tiling = false;
+ return true;
+}
+
+/**
+ * Initialize the pipe_resource descriptor to be of the same size as the box,
+ * which is supposed to hold a subregion of the texture "orig" at the given
+ * mipmap level.
+ */
+static void r600_init_temp_resource_from_box(struct pipe_resource *res,
+ struct pipe_resource *orig,
+ const struct pipe_box *box,
+ unsigned level, unsigned flags)
+{
+ memset(res, 0, sizeof(*res));
+ res->format = orig->format;
+ res->width0 = box->width;
+ res->height0 = box->height;
+ res->depth0 = 1;
+ res->array_size = 1;
+ res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+ res->flags = flags;
+
+ /* We must set the correct texture target and dimensions for a 3D box. */
+ if (box->depth > 1 && util_max_layer(orig, level) > 0) {
+ res->target = PIPE_TEXTURE_2D_ARRAY;
+ res->array_size = box->depth;
+ } else {
+ res->target = PIPE_TEXTURE_2D;
+ }
+}
+
+static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned transfer_usage,
+ const struct pipe_box *box)
+{
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ return rscreen->chip_class >= SI &&
+ !rtex->resource.b.is_shared &&
+ !(transfer_usage & PIPE_TRANSFER_READ) &&
+ rtex->resource.b.b.last_level == 0 &&
+ util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
+ box->x, box->y, box->z,
+ box->width, box->height,
+ box->depth);
+}
+
+static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+
+ /* There is no point in discarding depth and tiled buffers. */
+ assert(!rtex->is_depth);
+ assert(rtex->surface.is_linear);
+
+ /* Reallocate the buffer in the same pipe_resource. */
+ r600_alloc_resource(rscreen, &rtex->resource);
+
+ /* Initialize the CMASK base address (needed even without CMASK). */
+ rtex->cmask.base_address_reg =
+ (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+ p_atomic_inc(&rscreen->dirty_tex_counter);
+
+ rctx->num_alloc_tex_transfer_bytes += rtex->size;
+}
+
+static void *r600_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+ struct r600_transfer *trans;
+ struct r600_resource *buf;
+ unsigned offset = 0;
+ char *map;
+ bool use_staging_texture = false;
+
+ assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
+ assert(box->width && box->height && box->depth);
+
+ /* Depth textures use staging unconditionally. */
+ if (!rtex->is_depth) {
+ /* Degrade the tile mode if we get too many transfers on APUs.
+ * On dGPUs, the staging texture is always faster.
+ * Only count uploads that are at least 4x4 pixels large.
+ */
+ if (!rctx->screen->info.has_dedicated_vram &&
+ level == 0 &&
+ box->width >= 4 && box->height >= 4 &&
+ p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
+ bool can_invalidate =
+ r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box);
+
+ r600_reallocate_texture_inplace(rctx, rtex,
+ PIPE_BIND_LINEAR,
+ can_invalidate);
+ }
+
+ /* Tiled textures need to be converted into a linear texture for CPU
+ * access. The staging texture is always linear and is placed in GART.
+ *
+ * Reading from VRAM or GTT WC is slow, always use the staging
+ * texture in this case.
+ *
+ * Use the staging texture for uploads if the underlying BO
+ * is busy.
+ */
+ if (!rtex->surface.is_linear)
+ use_staging_texture = true;
+ else if (usage & PIPE_TRANSFER_READ)
+ use_staging_texture =
+ rtex->resource.domains & RADEON_DOMAIN_VRAM ||
+ rtex->resource.flags & RADEON_FLAG_GTT_WC;
+ /* Write & linear only: */
+ else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
+ RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rtex->resource.buf, 0,
+ RADEON_USAGE_READWRITE)) {
+ /* It's busy. */
+ if (r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box))
+ r600_texture_invalidate_storage(rctx, rtex);
+ else
+ use_staging_texture = true;
+ }
+ }
+
+ trans = CALLOC_STRUCT(r600_transfer);
+ if (!trans)
+ return NULL;
+ pipe_resource_reference(&trans->b.b.resource, texture);
+ trans->b.b.level = level;
+ trans->b.b.usage = usage;
+ trans->b.b.box = *box;
+
+ if (rtex->is_depth) {
+ struct r600_texture *staging_depth;
+
+ if (rtex->resource.b.b.nr_samples > 1) {
+ /* MSAA depth buffers need to be converted to single sample buffers.
+ *
+ * Mapping MSAA depth buffers can occur if ReadPixels is called
+ * with a multisample GLX visual.
+ *
+ * First downsample the depth buffer to a temporary texture,
+ * then decompress the temporary one to staging.
+ *
+ * Only the region being mapped is transfered.
+ */
+ struct pipe_resource resource;
+
+ r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
+
+ if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ) {
+ struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+ if (!temp) {
+ R600_ERR("failed to create a temporary depth texture\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
+ rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
+ 0, 0, 0, box->depth, 0, 0);
+ pipe_resource_reference(&temp, NULL);
+ }
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging_depth, level, NULL,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ } else {
+ /* XXX: only readback the rectangle which is being mapped? */
+ /* XXX: when discard is true, no need to read back from depth texture */
+ if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ rctx->blit_decompress_depth(ctx, rtex, staging_depth,
+ level, level,
+ box->z, box->z + box->depth - 1,
+ 0, 0);
+
+ offset = r600_texture_get_offset(rctx->screen, staging_depth,
+ level, box,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ }
+
+ trans->staging = (struct r600_resource*)staging_depth;
+ buf = trans->staging;
+ } else if (use_staging_texture) {
+ struct pipe_resource resource;
+ struct r600_texture *staging;
+
+ r600_init_temp_resource_from_box(&resource, texture, box, level,
+ R600_RESOURCE_FLAG_TRANSFER);
+ resource.usage = (usage & PIPE_TRANSFER_READ) ?
+ PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+
+ /* Create the temporary texture. */
+ staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
+ if (!staging) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+ trans->staging = &staging->resource;
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging, 0, NULL,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+
+ if (usage & PIPE_TRANSFER_READ)
+ r600_copy_to_staging_texture(ctx, trans);
+ else
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+
+ buf = trans->staging;
+ } else {
+ /* the resource is mapped directly */
+ offset = r600_texture_get_offset(rctx->screen, rtex, level, box,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ buf = &rtex->resource;
+ }
+
+ if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
+ r600_resource_reference(&trans->staging, NULL);
+ FREE(trans);
+ return NULL;
+ }
+
+ *ptransfer = &trans->b.b;
+ return map + offset;
+}
+
+static void r600_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer* transfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+
+ if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
+ if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
+ ctx->resource_copy_region(ctx, texture, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ &rtransfer->staging->b.b, transfer->level,
+ &transfer->box);
+ } else {
+ r600_copy_from_staging_texture(ctx, rtransfer);
+ }
+ }
+
+ if (rtransfer->staging) {
+ rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
+ r600_resource_reference(&rtransfer->staging, NULL);
+ }
+
+ /* Heuristic for {upload, draw, upload, draw, ..}:
+ *
+ * Flush the gfx IB if we've allocated too much texture storage.
+ *
+ * The idea is that we don't want to build IBs that use too much
+ * memory and put pressure on the kernel memory manager and we also
+ * want to make temporary and invalidated buffers go idle ASAP to
+ * decrease the total memory usage or make them reusable. The memory
+ * usage will be slightly higher than given here because of the buffer
+ * cache in the winsys.
+ *
+ * The result is that the kernel memory manager is never a bottleneck.
+ */
+ if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
+ rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ rctx->num_alloc_tex_transfer_bytes = 0;
+ }
+
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+ NULL, /* get_handle */
+ r600_texture_destroy, /* resource_destroy */
+ r600_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ r600_texture_transfer_unmap, /* transfer_unmap */
+};
+
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height)
+{
+ struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
+
+ if (!surface)
+ return NULL;
+
+ assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
+ assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
+
+ pipe_reference_init(&surface->base.reference, 1);
+ pipe_resource_reference(&surface->base.texture, texture);
+ surface->base.context = pipe;
+ surface->base.format = templ->format;
+ surface->base.width = width;
+ surface->base.height = height;
+ surface->base.u = templ->u;
+
+ surface->width0 = width0;
+ surface->height0 = height0;
+
+ return &surface->base;
+}
+
+static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ const struct pipe_surface *templ)
+{
+ unsigned level = templ->u.tex.level;
+ unsigned width = u_minify(tex->width0, level);
+ unsigned height = u_minify(tex->height0, level);
+ unsigned width0 = tex->width0;
+ unsigned height0 = tex->height0;
+
+ if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
+ const struct util_format_description *tex_desc
+ = util_format_description(tex->format);
+ const struct util_format_description *templ_desc
+ = util_format_description(templ->format);
+
+ assert(tex_desc->block.bits == templ_desc->block.bits);
+
+ /* Adjust size of surface if and only if the block width or
+ * height is changed. */
+ if (tex_desc->block.width != templ_desc->block.width ||
+ tex_desc->block.height != templ_desc->block.height) {
+ unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
+ unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
+
+ width = nblks_x * templ_desc->block.width;
+ height = nblks_y * templ_desc->block.height;
+
+ width0 = util_format_get_nblocksx(tex->format, width0);
+ height0 = util_format_get_nblocksy(tex->format, height0);
+ }
+ }
+
+ return r600_create_surface_custom(pipe, tex, templ,
+ width0, height0,
+ width, height);
+}
+
+static void r600_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surface)
+{
+ struct r600_surface *surf = (struct r600_surface*)surface;
+ r600_resource_reference(&surf->cb_buffer_fmask, NULL);
+ r600_resource_reference(&surf->cb_buffer_cmask, NULL);
+ pipe_resource_reference(&surface->texture, NULL);
+ FREE(surface);
+}
+
+static void r600_clear_texture(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data)
+{
+ struct pipe_screen *screen = pipe->screen;
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+ struct pipe_surface tmpl = {{0}};
+ struct pipe_surface *sf;
+ const struct util_format_description *desc =
+ util_format_description(tex->format);
+
+ tmpl.format = tex->format;
+ tmpl.u.tex.first_layer = box->z;
+ tmpl.u.tex.last_layer = box->z + box->depth - 1;
+ tmpl.u.tex.level = level;
+ sf = pipe->create_surface(pipe, tex, &tmpl);
+ if (!sf)
+ return;
+
+ if (rtex->is_depth) {
+ unsigned clear;
+ float depth;
+ uint8_t stencil = 0;
+
+ /* Depth is always present. */
+ clear = PIPE_CLEAR_DEPTH;
+ desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+
+ if (rtex->surface.has_stencil) {
+ clear |= PIPE_CLEAR_STENCIL;
+ desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+ }
+
+ pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
+ box->x, box->y,
+ box->width, box->height, false);
+ } else {
+ union pipe_color_union color;
+
+ /* pipe_color_union requires the full vec4 representation. */
+ if (util_format_is_pure_uint(tex->format))
+ desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
+ else if (util_format_is_pure_sint(tex->format))
+ desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
+ else
+ desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
+
+ if (screen->is_format_supported(screen, tex->format,
+ tex->target, 0,
+ PIPE_BIND_RENDER_TARGET)) {
+ pipe->clear_render_target(pipe, sf, &color,
+ box->x, box->y,
+ box->width, box->height, false);
+ } else {
+ /* Software fallback - just for R9G9B9E5_FLOAT */
+ util_clear_render_target(pipe, sf, &color,
+ box->x, box->y,
+ box->width, box->height);
+ }
+ }
+ pipe_surface_reference(&sf, NULL);
+}
+
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
+ return V_0280A0_SWAP_STD;
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return ~0U;
+
+ switch (desc->nr_channels) {
+ case 1:
+ if (HAS_SWIZZLE(0,X))
+ return V_0280A0_SWAP_STD; /* X___ */
+ else if (HAS_SWIZZLE(3,X))
+ return V_0280A0_SWAP_ALT_REV; /* ___X */
+ break;
+ case 2:
+ if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
+ (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
+ (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
+ return V_0280A0_SWAP_STD; /* XY__ */
+ else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
+ (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
+ (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
+ /* YX__ */
+ return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
+ else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
+ return V_0280A0_SWAP_ALT; /* X__Y */
+ else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
+ return V_0280A0_SWAP_ALT_REV; /* Y__X */
+ break;
+ case 3:
+ if (HAS_SWIZZLE(0,X))
+ return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
+ else if (HAS_SWIZZLE(0,Z))
+ return V_0280A0_SWAP_STD_REV; /* ZYX */
+ break;
+ case 4:
+ /* check the middle channels, the 1st and 4th channel can be NONE */
+ if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
+ return V_0280A0_SWAP_STD; /* XYZW */
+ } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
+ return V_0280A0_SWAP_STD_REV; /* WZYX */
+ } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
+ return V_0280A0_SWAP_ALT; /* ZYXW */
+ } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
+ /* YZWX */
+ if (desc->is_array)
+ return V_0280A0_SWAP_ALT_REV;
+ else
+ return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
+ }
+ break;
+ }
+ return ~0U;
+}
+
+/* FAST COLOR CLEAR */
+
+static void evergreen_set_clear_color(struct r600_texture *rtex,
+ enum pipe_format surface_format,
+ const union pipe_color_union *color)
+{
+ union util_color uc;
+
+ memset(&uc, 0, sizeof(uc));
+
+ if (rtex->surface.bpe == 16) {
+ /* DCC fast clear only:
+ * CLEAR_WORD0 = R = G = B
+ * CLEAR_WORD1 = A
+ */
+ assert(color->ui[0] == color->ui[1] &&
+ color->ui[0] == color->ui[2]);
+ uc.ui[0] = color->ui[0];
+ uc.ui[1] = color->ui[3];
+ } else if (util_format_is_pure_uint(surface_format)) {
+ util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
+ } else if (util_format_is_pure_sint(surface_format)) {
+ util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
+ } else {
+ util_pack_color(color->f, surface_format, &uc);
+ }
+
+ memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
+}
+
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+ struct pipe_framebuffer_state *fb,
+ struct r600_atom *fb_state,
+ unsigned *buffers, ubyte *dirty_cbufs,
+ const union pipe_color_union *color)
+{
+ int i;
+
+ /* This function is broken in BE, so just disable this path for now */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ return;
+#endif
+
+ if (rctx->render_cond)
+ return;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ struct r600_texture *tex;
+ unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
+
+ if (!fb->cbufs[i])
+ continue;
+
+ /* if this colorbuffer is not being cleared */
+ if (!(*buffers & clear_bit))
+ continue;
+
+ tex = (struct r600_texture *)fb->cbufs[i]->texture;
+
+ /* the clear is allowed if all layers are bound */
+ if (fb->cbufs[i]->u.tex.first_layer != 0 ||
+ fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
+ continue;
+ }
+
+ /* cannot clear mipmapped textures */
+ if (fb->cbufs[i]->texture->last_level != 0) {
+ continue;
+ }
+
+ /* only supported on tiled surfaces */
+ if (tex->surface.is_linear) {
+ continue;
+ }
+
+ /* shared textures can't use fast clear without an explicit flush,
+ * because there is no way to communicate the clear color among
+ * all clients
+ */
+ if (tex->resource.b.is_shared &&
+ !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
+ continue;
+
+ {
+ /* 128-bit formats are unusupported */
+ if (tex->surface.bpe > 8) {
+ continue;
+ }
+
+ /* ensure CMASK is enabled */
+ r600_texture_alloc_cmask_separate(rctx->screen, tex);
+ if (tex->cmask.size == 0) {
+ continue;
+ }
+
+ /* Do the fast clear. */
+ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
+ tex->cmask.offset, tex->cmask.size, 0,
+ R600_COHERENCY_CB_META);
+
+ bool need_compressed_update = !tex->dirty_level_mask;
+
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+
+ if (need_compressed_update)
+ p_atomic_inc(&rctx->screen->compressed_colortex_counter);
+ }
+
+ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
+
+ if (dirty_cbufs)
+ *dirty_cbufs |= 1 << i;
+ rctx->set_atom_dirty(rctx, fb_state, true);
+ *buffers &= ~clear_bit;
+ }
+}
+
+static struct pipe_memory_object *
+r600_memobj_from_handle(struct pipe_screen *screen,
+ struct winsys_handle *whandle,
+ bool dedicated)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
+ struct pb_buffer *buf = NULL;
+ uint32_t stride, offset;
+
+ if (!memobj)
+ return NULL;
+
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+ &stride, &offset);
+ if (!buf) {
+ free(memobj);
+ return NULL;
+ }
+
+ memobj->b.dedicated = dedicated;
+ memobj->buf = buf;
+ memobj->stride = stride;
+ memobj->offset = offset;
+
+ return (struct pipe_memory_object *)memobj;
+
+}
+
+static void
+r600_memobj_destroy(struct pipe_screen *screen,
+ struct pipe_memory_object *_memobj)
+{
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+
+ pb_reference(&memobj->buf, NULL);
+ free(memobj);
+}
+
+static struct pipe_resource *
+r600_texture_from_memobj(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *_memobj,
+ uint64_t offset)
+{
+ int r;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+ struct r600_texture *rtex;
+ struct radeon_surf surface = {};
+ struct radeon_bo_metadata metadata = {};
+ enum radeon_surf_mode array_mode;
+ bool is_scanout;
+ struct pb_buffer *buf = NULL;
+
+ if (memobj->b.dedicated) {
+ rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
+ } else {
+ /**
+ * The bo metadata is unset for un-dedicated images. So we fall
+ * back to linear. See answer to question 5 of the
+ * VK_KHX_external_memory spec for some details.
+ *
+ * It is possible that this case isn't going to work if the
+ * surface pitch isn't correctly aligned by default.
+ *
+ * In order to support it correctly we require multi-image
+ * metadata to be syncrhonized between radv and radeonsi. The
+ * semantics of associating multiple image metadata to a memory
+ * object on the vulkan export side are not concretely defined
+ * either.
+ *
+ * All the use cases we are aware of at the moment for memory
+ * objects use dedicated allocations. So lets keep the initial
+ * implementation simple.
+ *
+ * A possible alternative is to attempt to reconstruct the
+ * tiling information when the TexParameter TEXTURE_TILING_EXT
+ * is set.
+ */
+ array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ is_scanout = false;
+
+ }
+
+ r = r600_init_surface(rscreen, &surface, templ,
+ array_mode, memobj->stride,
+ offset, true, is_scanout,
+ false);
+ if (r)
+ return NULL;
+
+ rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
+ if (!rtex)
+ return NULL;
+
+ /* r600_texture_create_object doesn't increment refcount of
+ * memobj->buf, so increment it here.
+ */
+ pb_reference(&buf, memobj->buf);
+
+ rtex->resource.b.is_shared = true;
+ rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
+ return &rtex->resource.b.b;
+}
+
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.resource_from_handle = r600_texture_from_handle;
+ rscreen->b.resource_get_handle = r600_texture_get_handle;
+ rscreen->b.resource_from_memobj = r600_texture_from_memobj;
+ rscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
+ rscreen->b.memobj_destroy = r600_memobj_destroy;
+}
+
+void r600_init_context_texture_functions(struct r600_common_context *rctx)
+{
+ rctx->b.create_surface = r600_create_surface;
+ rctx->b.surface_destroy = r600_surface_destroy;
+ rctx->b.clear_texture = r600_clear_texture;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_viewport.c b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c
new file mode 100644
index 000000000..0797f932f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600_viewport.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r600_cs.h"
+#include "util/u_viewport.h"
+#include "tgsi/tgsi_scan.h"
+
+#define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C
+#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
+#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
+
+#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
+#define S_028250_TL_X(x) (((unsigned)(x) & 0x7FFF) << 0)
+#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028250_TL_X 0xFFFF8000
+#define S_028250_TL_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
+#define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028250_TL_Y 0x8000FFFF
+#define S_028250_WINDOW_OFFSET_DISABLE(x) (((unsigned)(x) & 0x1) << 31)
+#define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1)
+#define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF
+#define S_028254_BR_X(x) (((unsigned)(x) & 0x7FFF) << 0)
+#define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028254_BR_X 0xFFFF8000
+#define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
+#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028254_BR_Y 0x8000FFFF
+#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
+#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
+
+#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
+
+static void r600_set_scissor_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ int i;
+
+ for (i = 0; i < num_scissors; i++)
+ rctx->scissors.states[start_slot + i] = state[i];
+
+ if (!rctx->scissor_enabled)
+ return;
+
+ rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+/* Since the guard band disables clipping, we have to clip per-pixel
+ * using a scissor.
+ */
+static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
+ const struct pipe_viewport_state *vp,
+ struct r600_signed_scissor *scissor)
+{
+ float tmp, minx, miny, maxx, maxy;
+
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ minx = -vp->scale[0] + vp->translate[0];
+ miny = -vp->scale[1] + vp->translate[1];
+ maxx = vp->scale[0] + vp->translate[0];
+ maxy = vp->scale[1] + vp->translate[1];
+
+ /* r600_draw_rectangle sets this. Disable the scissor. */
+ if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
+ scissor->minx = scissor->miny = 0;
+ scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
+ return;
+ }
+
+ /* Handle inverted viewports. */
+ if (minx > maxx) {
+ tmp = minx;
+ minx = maxx;
+ maxx = tmp;
+ }
+ if (miny > maxy) {
+ tmp = miny;
+ miny = maxy;
+ maxy = tmp;
+ }
+
+ /* Convert to integer and round up the max bounds. */
+ scissor->minx = minx;
+ scissor->miny = miny;
+ scissor->maxx = ceilf(maxx);
+ scissor->maxy = ceilf(maxy);
+}
+
+static void r600_clamp_scissor(struct r600_common_context *rctx,
+ struct pipe_scissor_state *out,
+ struct r600_signed_scissor *scissor)
+{
+ unsigned max_scissor = GET_MAX_SCISSOR(rctx);
+ out->minx = CLAMP(scissor->minx, 0, max_scissor);
+ out->miny = CLAMP(scissor->miny, 0, max_scissor);
+ out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
+ out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
+}
+
+static void r600_clip_scissor(struct pipe_scissor_state *out,
+ struct pipe_scissor_state *clip)
+{
+ out->minx = MAX2(out->minx, clip->minx);
+ out->miny = MAX2(out->miny, clip->miny);
+ out->maxx = MIN2(out->maxx, clip->maxx);
+ out->maxy = MIN2(out->maxy, clip->maxy);
+}
+
+static void r600_scissor_make_union(struct r600_signed_scissor *out,
+ struct r600_signed_scissor *in)
+{
+ out->minx = MIN2(out->minx, in->minx);
+ out->miny = MIN2(out->miny, in->miny);
+ out->maxx = MAX2(out->maxx, in->maxx);
+ out->maxy = MAX2(out->maxy, in->maxy);
+}
+
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor)
+{
+ if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
+ if (scissor->maxx == 0)
+ scissor->minx = 1;
+ if (scissor->maxy == 0)
+ scissor->miny = 1;
+
+ if (rctx->chip_class == CAYMAN &&
+ scissor->maxx == 1 && scissor->maxy == 1)
+ scissor->maxx = 2;
+ }
+}
+
+static void r600_emit_one_scissor(struct r600_common_context *rctx,
+ struct radeon_winsys_cs *cs,
+ struct r600_signed_scissor *vp_scissor,
+ struct pipe_scissor_state *scissor)
+{
+ struct pipe_scissor_state final;
+
+ if (rctx->vs_disables_clipping_viewport) {
+ final.minx = final.miny = 0;
+ final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
+ } else {
+ r600_clamp_scissor(rctx, &final, vp_scissor);
+ }
+
+ if (scissor)
+ r600_clip_scissor(&final, scissor);
+
+ evergreen_apply_scissor_bug_workaround(rctx, &final);
+
+ radeon_emit(cs, S_028250_TL_X(final.minx) |
+ S_028250_TL_Y(final.miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(final.maxx) |
+ S_028254_BR_Y(final.maxy));
+}
+
+/* the range is [-MAX, MAX] */
+#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
+
+static void r600_emit_guardband(struct r600_common_context *rctx,
+ struct r600_signed_scissor *vp_as_scissor)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor->minx == vp_as_scissor->maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor->miny == vp_as_scissor->maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * Use a limit one pixel smaller to allow for some precision error.
+ */
+ max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = ( max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = ( max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ /* If any of the GB registers is updated, all of them must be updated. */
+ if (rctx->chip_class >= CAYMAN)
+ radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+ else
+ radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
+
+ radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+}
+
+static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_scissor_state *states = rctx->scissors.states;
+ unsigned mask = rctx->scissors.dirty_mask;
+ bool scissor_enabled = rctx->scissor_enabled;
+ struct r600_signed_scissor max_vp_scissor;
+ int i;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
+
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
+ r600_emit_guardband(rctx, vp);
+ rctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ /* Shaders can draw to any viewport. Make a union of all viewports. */
+ max_vp_scissor = rctx->viewports.as_scissor[0];
+ for (i = 1; i < R600_MAX_VIEWPORTS; i++)
+ r600_scissor_make_union(&max_vp_scissor,
+ &rctx->viewports.as_scissor[i]);
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
+ scissor_enabled ? &states[i] : NULL);
+ }
+ }
+ r600_emit_guardband(rctx, &max_vp_scissor);
+ rctx->scissors.dirty_mask = 0;
+}
+
+static void r600_set_viewport_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned mask;
+ int i;
+
+ for (i = 0; i < num_viewports; i++) {
+ unsigned index = start_slot + i;
+
+ rctx->viewports.states[index] = state[i];
+ r600_get_scissor_from_viewport(rctx, &state[i],
+ &rctx->viewports.as_scissor[index]);
+ }
+
+ mask = ((1 << num_viewports) - 1) << start_slot;
+ rctx->viewports.dirty_mask |= mask;
+ rctx->viewports.depth_range_dirty_mask |= mask;
+ rctx->scissors.dirty_mask |= mask;
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+static void r600_emit_one_viewport(struct r600_common_context *rctx,
+ struct pipe_viewport_state *state)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+
+ radeon_emit(cs, fui(state->scale[0]));
+ radeon_emit(cs, fui(state->translate[0]));
+ radeon_emit(cs, fui(state->scale[1]));
+ radeon_emit(cs, fui(state->translate[1]));
+ radeon_emit(cs, fui(state->scale[2]));
+ radeon_emit(cs, fui(state->translate[2]));
+}
+
+static void r600_emit_viewports(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state *states = rctx->viewports.states;
+ unsigned mask = rctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ r600_emit_one_viewport(rctx, &states[0]);
+ rctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++)
+ r600_emit_one_viewport(rctx, &states[i]);
+ }
+ rctx->viewports.dirty_mask = 0;
+}
+
+static void r600_emit_depth_ranges(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state *states = rctx->viewports.states;
+ unsigned mask = rctx->viewports.depth_range_dirty_mask;
+ float zmin, zmax;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ }
+ }
+ rctx->viewports.depth_range_dirty_mask = 0;
+}
+
+static void r600_emit_viewport_states(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_emit_viewports(rctx);
+ r600_emit_depth_ranges(rctx);
+}
+
+/* Set viewport dependencies on pipe_rasterizer_state. */
+void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
+ bool scissor_enable, bool clip_halfz)
+{
+ if (rctx->scissor_enabled != scissor_enable) {
+ rctx->scissor_enabled = scissor_enable;
+ rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ }
+ if (rctx->clip_halfz != clip_halfz) {
+ rctx->clip_halfz = clip_halfz;
+ rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+ }
+}
+
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info)
+{
+ bool vs_window_space;
+
+ if (!info)
+ return;
+
+ /* When the VS disables clipping and viewport transformation. */
+ vs_window_space =
+ info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
+ if (rctx->vs_disables_clipping_viewport != vs_window_space) {
+ rctx->vs_disables_clipping_viewport = vs_window_space;
+ rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ }
+
+ /* Viewport index handling. */
+ rctx->vs_writes_viewport_index = info->writes_viewport_index;
+ if (!rctx->vs_writes_viewport_index)
+ return;
+
+ if (rctx->scissors.dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+
+ if (rctx->viewports.dirty_mask ||
+ rctx->viewports.depth_range_dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+}
+
+void r600_init_viewport_functions(struct r600_common_context *rctx)
+{
+ rctx->scissors.atom.emit = r600_emit_scissors;
+ rctx->viewports.atom.emit = r600_emit_viewport_states;
+
+ rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
+ rctx->viewports.atom.num_dw = 2 + 16 * 6;
+
+ rctx->b.set_scissor_states = r600_set_scissor_states;
+ rctx->b.set_viewport_states = r600_set_viewport_states;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/r600d_common.h b/lib/mesa/src/gallium/drivers/r600/r600d_common.h
new file mode 100644
index 000000000..ed1d46076
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/r600d_common.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ */
+
+#ifndef R600D_COMMON_H
+#define R600D_COMMON_H
+
+#define R600_CONFIG_REG_OFFSET 0x08000
+#define R600_CONTEXT_REG_OFFSET 0x28000
+#define SI_SH_REG_OFFSET 0x0000B000
+#define SI_SH_REG_END 0x0000C000
+#define CIK_UCONFIG_REG_OFFSET 0x00030000
+#define CIK_UCONFIG_REG_END 0x00038000
+
+#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
+#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
+#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8)
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
+
+#define PKT3_NOP 0x10
+#define PKT3_SET_PREDICATION 0x20
+#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
+#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
+#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
+#define STRMOUT_OFFSET_FROM_PACKET 0
+#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
+#define STRMOUT_OFFSET_FROM_MEM 2
+#define STRMOUT_OFFSET_NONE 3
+#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
+#define PKT3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_EQUAL 3
+#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
+#define PKT3_COPY_DATA 0x40
+#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
+#define COPY_DATA_REG 0
+#define COPY_DATA_MEM 1
+#define COPY_DATA_PERF 4
+#define COPY_DATA_IMM 5
+#define COPY_DATA_TIMESTAMP 9
+#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
+#define COPY_DATA_MEM_ASYNC 5
+#define COPY_DATA_COUNT_SEL (1 << 16)
+#define COPY_DATA_WR_CONFIRM (1 << 20)
+#define PKT3_EVENT_WRITE 0x46
+#define PKT3_EVENT_WRITE_EOP 0x47
+#define EOP_INT_SEL(x) ((x) << 24)
+#define EOP_INT_SEL_NONE 0
+#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
+#define EOP_DATA_SEL(x) ((x) << 29)
+#define EOP_DATA_SEL_DISCARD 0
+#define EOP_DATA_SEL_VALUE_32BIT 1
+#define EOP_DATA_SEL_VALUE_64BIT 2
+#define EOP_DATA_SEL_TIMESTAMP 3
+#define PKT3_RELEASE_MEM 0x49 /* GFX9+ */
+#define PKT3_SET_CONFIG_REG 0x68
+#define PKT3_SET_CONTEXT_REG 0x69
+#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */
+#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */
+#define SURFACE_BASE_UPDATE_DEPTH (1 << 0)
+#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x))
+#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
+#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
+#define PKT3_SET_SH_REG 0x76 /* SI and later */
+#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
+
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */
+#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
+#define EVENT_TYPE_ZPASS_DONE 0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
+#define EVENT_TYPE_PERFCOUNTER_START 0x17
+#define EVENT_TYPE_PERFCOUNTER_STOP 0x18
+#define EVENT_TYPE_PIPELINESTAT_START 25
+#define EVENT_TYPE_PIPELINESTAT_STOP 26
+#define EVENT_TYPE_PERFCOUNTER_SAMPLE 0x1B
+#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
+#define EVENT_TYPE_BOTTOM_OF_PIPE_TS 40
+#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
+#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - TS events
+ */
+
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+#define PREDICATION_OP_BOOL64 0x3
+#define PRED_OP(x) ((x) << 16)
+#define PREDICATION_CONTINUE (1 << 31)
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
+#define V_0280A0_SWAP_STD 0x00000000
+#define V_0280A0_SWAP_ALT 0x00000001
+#define V_0280A0_SWAP_STD_REV 0x00000002
+#define V_0280A0_SWAP_ALT_REV 0x00000003
+
+#define EG_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 17)
+#define SI_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 13)
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c
new file mode 100644
index 000000000..b0551d7e1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.c
@@ -0,0 +1,1492 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_memory.h"
+#include "util/u_video.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_mpeg12_decoder.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_uvd.h"
+
+#define NUM_BUFFERS 4
+
+#define NUM_MPEG2_REFS 6
+#define NUM_H264_REFS 17
+#define NUM_VC1_REFS 5
+
+#define FB_BUFFER_OFFSET 0x1000
+#define FB_BUFFER_SIZE 2048
+#define FB_BUFFER_SIZE_TONGA (2048 * 64)
+#define IT_SCALING_TABLE_SIZE 992
+#define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
+
+/* UVD decoder representation */
+struct ruvd_decoder {
+ struct pipe_video_codec base;
+
+ ruvd_set_dtb set_dtb;
+
+ unsigned stream_handle;
+ unsigned stream_type;
+ unsigned frame_number;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys* ws;
+ struct radeon_winsys_cs* cs;
+
+ unsigned cur_buffer;
+
+ struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
+ struct ruvd_msg *msg;
+ uint32_t *fb;
+ unsigned fb_size;
+ uint8_t *it;
+
+ struct rvid_buffer bs_buffers[NUM_BUFFERS];
+ void* bs_ptr;
+ unsigned bs_size;
+
+ struct rvid_buffer dpb;
+ bool use_legacy;
+ struct rvid_buffer ctx;
+ struct rvid_buffer sessionctx;
+ struct {
+ unsigned data0;
+ unsigned data1;
+ unsigned cmd;
+ unsigned cntl;
+ } reg;
+};
+
+/* flush IB to the hardware */
+static int flush(struct ruvd_decoder *dec, unsigned flags)
+{
+ return dec->ws->cs_flush(dec->cs, flags, NULL);
+}
+
+/* add a new set register command to the IB */
+static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
+{
+ radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0));
+ radeon_emit(dec->cs, val);
+}
+
+/* send a command to the VCPU through the GPCOM registers */
+static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
+ struct pb_buffer* buf, uint32_t off,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain)
+{
+ int reloc_idx;
+
+ reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain,
+ RADEON_PRIO_UVD);
+ if (!dec->use_legacy) {
+ uint64_t addr;
+ addr = dec->ws->buffer_get_virtual_address(buf);
+ addr = addr + off;
+ set_reg(dec, dec->reg.data0, addr);
+ set_reg(dec, dec->reg.data1, addr >> 32);
+ } else {
+ off += dec->ws->buffer_get_reloc_offset(buf);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ }
+ set_reg(dec, dec->reg.cmd, cmd << 1);
+}
+
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+ return dec->stream_type == RUVD_CODEC_H264_PERF ||
+ dec->stream_type == RUVD_CODEC_H265;
+}
+
+/* map the next available message/feedback/itscaling buffer */
+static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
+{
+ struct rvid_buffer* buf;
+ uint8_t *ptr;
+
+ /* grab the current message/feedback buffer */
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+
+ /* and map it for CPU access */
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+
+ /* calc buffer offsets */
+ dec->msg = (struct ruvd_msg *)ptr;
+ memset(dec->msg, 0, sizeof(*dec->msg));
+
+ dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
+}
+
+/* unmap and send a message command to the VCPU */
+static void send_msg_buf(struct ruvd_decoder *dec)
+{
+ struct rvid_buffer* buf;
+
+ /* ignore the request if message/feedback buffer isn't mapped */
+ if (!dec->msg || !dec->fb)
+ return;
+
+ /* grab the current message buffer */
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+
+ /* unmap the buffer */
+ dec->ws->buffer_unmap(buf->res->buf);
+ dec->msg = NULL;
+ dec->fb = NULL;
+ dec->it = NULL;
+
+
+ if (dec->sessionctx.res)
+ send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER,
+ dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
+
+ /* and send it to the hardware */
+ send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0,
+ RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+}
+
+/* cycle to the next set of buffers */
+static void next_buffer(struct ruvd_decoder *dec)
+{
+ ++dec->cur_buffer;
+ dec->cur_buffer %= NUM_BUFFERS;
+}
+
+/* convert the profile into something UVD understands */
+static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
+{
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ return RUVD_CODEC_H264;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ return RUVD_CODEC_VC1;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ return RUVD_CODEC_MPEG2;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ return RUVD_CODEC_MPEG4;
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return RUVD_CODEC_H265;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ return RUVD_CODEC_MJPEG;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
+{
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
+}
+
+static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic)
+{
+ unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+ unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
+ unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
+
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+ unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
+ log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+
+ width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+ height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+
+ num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
+ context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
+ max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
+
+ cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
+ db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
+
+ return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
+}
+
+static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
+{
+ return 16;
+}
+
+/* calculate size of reference picture buffer */
+static unsigned calc_dpb_size(struct ruvd_decoder *dec)
+{
+ unsigned width_in_mb, height_in_mb, image_size, dpb_size;
+
+ // always align them to MB size for dpb calculation
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ // always one more for currently decoded picture
+ unsigned max_references = dec->base.max_references + 1;
+
+ // aligned size of a single frame
+ image_size = align(width, get_db_pitch_alignment(dec)) * height;
+ image_size += image_size / 2;
+ image_size = align(image_size, 1024);
+
+ // picture width & height in 16 pixel units
+ width_in_mb = width / VL_MACROBLOCK_WIDTH;
+ height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
+
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ if (!dec->use_legacy) {
+ unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned alignment = 64, num_dpb_buffer;
+
+ if (dec->stream_type == RUVD_CODEC_H264_PERF)
+ alignment = 256;
+ switch(dec->base.level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
+ dpb_size = image_size * max_references;
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
+ dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
+ dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
+ }
+ } else {
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_H264_REFS, max_references);
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
+ // macroblock context buffer
+ dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ // IT surface buffer
+ dpb_size += width_in_mb * height_in_mb * 32;
+ }
+ }
+ break;
+ }
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references;
+ else
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references;
+ break;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_VC1_REFS, max_references);
+
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+
+ // CONTEXT_BUFFER
+ dpb_size += width_in_mb * height_in_mb * 128;
+
+ // IT surface buffer
+ dpb_size += width_in_mb * 64;
+
+ // DB surface buffer
+ dpb_size += width_in_mb * 128;
+
+ // BP
+ dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ // reference picture buffer, must be big enough for all frames
+ dpb_size = image_size * NUM_MPEG2_REFS;
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+
+ // CM
+ dpb_size += width_in_mb * height_in_mb * 64;
+
+ // IT surface buffer
+ dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+
+ dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
+ break;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ dpb_size = 0;
+ break;
+
+ default:
+ // something is missing here
+ assert(0);
+
+ // at least use a sane default value
+ dpb_size = 32 * 1024 * 1024;
+ break;
+ }
+ return dpb_size;
+}
+
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+ /* NOOP, since we only use an intptr */
+}
+
+/* get h264 specific message bits */
+static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
+{
+ struct ruvd_h264 result;
+
+ memset(&result, 0, sizeof(result));
+ switch (pic->base.profile) {
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
+ result.profile = RUVD_H264_PROFILE_BASELINE;
+ break;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ result.profile = RUVD_H264_PROFILE_MAIN;
+ break;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ result.profile = RUVD_H264_PROFILE_HIGH;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ result.level = dec->base.level;
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
+
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
+ result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+
+ switch (dec->base.chroma_format) {
+ case PIPE_VIDEO_CHROMA_FORMAT_NONE:
+ /* TODO: assert? */
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_400:
+ result.chroma_format = 0;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_420:
+ result.chroma_format = 1;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_422:
+ result.chroma_format = 2;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_444:
+ result.chroma_format = 3;
+ break;
+ }
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
+ result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
+ result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
+ result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
+ result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
+ result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
+
+ result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
+ result.slice_group_map_type = pic->pps->slice_group_map_type;
+ result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
+ result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
+ result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
+ result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
+
+ memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
+ memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
+
+ if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+ memcpy(dec->it, result.scaling_list_4x4, 6*16);
+ memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+ }
+
+ result.num_ref_frames = pic->num_ref_frames;
+
+ result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
+ result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
+
+ result.frame_num = pic->frame_num;
+ memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
+ result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
+ result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
+ memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
+
+ result.decoded_pic_idx = pic->frame_num;
+
+ return result;
+}
+
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+ struct pipe_h265_picture_desc *pic)
+{
+ struct ruvd_h265 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+ result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+ result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+ result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+ result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+ result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+ if (pic->UseRefPicList == true)
+ result.sps_info_flags |= 1 << 10;
+
+ result.chroma_format = pic->pps->sps->chroma_format_idc;
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+ result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+ result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+ result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+ result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+ result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+ result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+ result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+ result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+ result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+ result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+ result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+ result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+ result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+ result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+ result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+ result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+ result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+ result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+ result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+ result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+ result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+ result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+ result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+ result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+ result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+ //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+ result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+ result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+ result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+ result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+ result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+ result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+ result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+ result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+ result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+ result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+ result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+ result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+ result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+ for (i = 0; i < 19; ++i)
+ result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+ for (i = 0; i < 21; ++i)
+ result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+ result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+ result.curr_idx = pic->CurrPicOrderCntVal;
+ result.curr_poc = pic->CurrPicOrderCntVal;
+
+ vl_video_buffer_set_associated_data(target, &dec->base,
+ (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+ &ruvd_destroy_associated_data);
+
+ for (i = 0; i < 16; ++i) {
+ struct pipe_video_buffer *ref = pic->ref[i];
+ uintptr_t ref_pic = 0;
+
+ result.poc_list[i] = pic->PicOrderCntVal[i];
+
+ if (ref)
+ ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+ else
+ ref_pic = 0x7F;
+ result.ref_pic_list[i] = ref_pic;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ result.ref_pic_set_st_curr_before[i] = 0xFF;
+ result.ref_pic_set_st_curr_after[i] = 0xFF;
+ result.ref_pic_set_lt_curr[i] = 0xFF;
+ }
+
+ for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+ result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+ for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+ result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+ for (i = 0; i < pic->NumPocLtCurr; ++i)
+ result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+ for (i = 0; i < 6; ++i)
+ result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+ for (i = 0; i < 2; ++i)
+ result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+ memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+ memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+ memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+ memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+ for (i = 0 ; i < 2 ; i++) {
+ for (int j = 0 ; j < 15 ; j++)
+ result.direct_reflist[i][j] = pic->RefPicList[i][j];
+ }
+
+ if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
+ if (target->buffer_format == PIPE_FORMAT_P016) {
+ result.p010_mode = 1;
+ result.msb_mode = 1;
+ } else {
+ result.luma_10to8 = 5;
+ result.chroma_10to8 = 5;
+ result.sclr_luma10to8 = 4;
+ result.sclr_chroma10to8 = 4;
+ }
+ }
+
+ /* TODO
+ result.highestTid;
+ result.isNonRef;
+
+ IDRPicFlag;
+ RAPPicFlag;
+ NumPocTotalCurr;
+ NumShortTermPictureSliceHeaderBits;
+ NumLongTermPictureSliceHeaderBits;
+
+ IsLongTerm[16];
+ */
+
+ return result;
+}
+
+/* get vc1 specific message bits */
+static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
+{
+ struct ruvd_vc1 result;
+
+ memset(&result, 0, sizeof(result));
+
+ switch(pic->base.profile) {
+ case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+ result.profile = RUVD_VC1_PROFILE_SIMPLE;
+ result.level = 1;
+ break;
+
+ case PIPE_VIDEO_PROFILE_VC1_MAIN:
+ result.profile = RUVD_VC1_PROFILE_MAIN;
+ result.level = 2;
+ break;
+
+ case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+ result.profile = RUVD_VC1_PROFILE_ADVANCED;
+ result.level = 4;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /* fields common for all profiles */
+ result.sps_info_flags |= pic->postprocflag << 7;
+ result.sps_info_flags |= pic->pulldown << 6;
+ result.sps_info_flags |= pic->interlace << 5;
+ result.sps_info_flags |= pic->tfcntrflag << 4;
+ result.sps_info_flags |= pic->finterpflag << 3;
+ result.sps_info_flags |= pic->psf << 1;
+
+ result.pps_info_flags |= pic->range_mapy_flag << 31;
+ result.pps_info_flags |= pic->range_mapy << 28;
+ result.pps_info_flags |= pic->range_mapuv_flag << 27;
+ result.pps_info_flags |= pic->range_mapuv << 24;
+ result.pps_info_flags |= pic->multires << 21;
+ result.pps_info_flags |= pic->maxbframes << 16;
+ result.pps_info_flags |= pic->overlap << 11;
+ result.pps_info_flags |= pic->quantizer << 9;
+ result.pps_info_flags |= pic->panscan_flag << 7;
+ result.pps_info_flags |= pic->refdist_flag << 6;
+ result.pps_info_flags |= pic->vstransform << 0;
+
+ /* some fields only apply to main/advanced profile */
+ if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
+ result.pps_info_flags |= pic->syncmarker << 20;
+ result.pps_info_flags |= pic->rangered << 19;
+ result.pps_info_flags |= pic->loopfilter << 5;
+ result.pps_info_flags |= pic->fastuvmc << 4;
+ result.pps_info_flags |= pic->extended_mv << 3;
+ result.pps_info_flags |= pic->extended_dmv << 8;
+ result.pps_info_flags |= pic->dquant << 1;
+ }
+
+ result.chroma_format = 1;
+
+#if 0
+//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
+uint32_t slice_count
+uint8_t picture_type
+uint8_t frame_coding_mode
+uint8_t deblockEnable
+uint8_t pquant
+#endif
+
+ return result;
+}
+
+/* extract the frame number from a referenced video buffer */
+static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
+{
+ uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
+ uint32_t max = MAX2(dec->frame_number, 1) - 1;
+ uintptr_t frame;
+
+ /* seems to be the most sane fallback */
+ if (!ref)
+ return max;
+
+ /* get the frame number from the associated data */
+ frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+
+ /* limit the frame number to a valid range */
+ return MAX2(MIN2(frame, max), min);
+}
+
+/* get mpeg2 specific msg bits */
+static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
+ struct pipe_mpeg12_picture_desc *pic)
+{
+ const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+ struct ruvd_mpeg2 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+ result.decoded_pic_idx = dec->frame_number;
+ for (i = 0; i < 2; ++i)
+ result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+
+ result.load_intra_quantiser_matrix = 1;
+ result.load_nonintra_quantiser_matrix = 1;
+
+ for (i = 0; i < 64; ++i) {
+ result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
+ result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
+ }
+
+ result.profile_and_level_indication = 0;
+ result.chroma_format = 0x1;
+
+ result.picture_coding_type = pic->picture_coding_type;
+ result.f_code[0][0] = pic->f_code[0][0] + 1;
+ result.f_code[0][1] = pic->f_code[0][1] + 1;
+ result.f_code[1][0] = pic->f_code[1][0] + 1;
+ result.f_code[1][1] = pic->f_code[1][1] + 1;
+ result.intra_dc_precision = pic->intra_dc_precision;
+ result.pic_structure = pic->picture_structure;
+ result.top_field_first = pic->top_field_first;
+ result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
+ result.concealment_motion_vectors = pic->concealment_motion_vectors;
+ result.q_scale_type = pic->q_scale_type;
+ result.intra_vlc_format = pic->intra_vlc_format;
+ result.alternate_scan = pic->alternate_scan;
+
+ return result;
+}
+
+/* get mpeg4 specific msg bits */
+static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
+ struct pipe_mpeg4_picture_desc *pic)
+{
+ struct ruvd_mpeg4 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+ result.decoded_pic_idx = dec->frame_number;
+ for (i = 0; i < 2; ++i)
+ result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+
+ result.variant_type = 0;
+ result.profile_and_level_indication = 0xF0; // ASP Level0
+
+ result.video_object_layer_verid = 0x5; // advanced simple
+ result.video_object_layer_shape = 0x0; // rectangular
+
+ result.video_object_layer_width = dec->base.width;
+ result.video_object_layer_height = dec->base.height;
+
+ result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
+
+ result.flags |= pic->short_video_header << 0;
+ //result.flags |= obmc_disable << 1;
+ result.flags |= pic->interlaced << 2;
+ result.flags |= 1 << 3; // load_intra_quant_mat
+ result.flags |= 1 << 4; // load_nonintra_quant_mat
+ result.flags |= pic->quarter_sample << 5;
+ result.flags |= 1 << 6; // complexity_estimation_disable
+ result.flags |= pic->resync_marker_disable << 7;
+ //result.flags |= data_partitioned << 8;
+ //result.flags |= reversible_vlc << 9;
+ result.flags |= 0 << 10; // newpred_enable
+ result.flags |= 0 << 11; // reduced_resolution_vop_enable
+ //result.flags |= scalability << 12;
+ //result.flags |= is_object_layer_identifier << 13;
+ //result.flags |= fixed_vop_rate << 14;
+ //result.flags |= newpred_segment_type << 15;
+
+ result.quant_type = pic->quant_type;
+
+ for (i = 0; i < 64; ++i) {
+ result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
+ result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
+ }
+
+ /*
+ int32_t trd [2]
+ int32_t trb [2]
+ uint8_t vop_coding_type
+ uint8_t vop_fcode_forward
+ uint8_t vop_fcode_backward
+ uint8_t rounding_control
+ uint8_t alternate_vertical_scan_flag
+ uint8_t top_field_first
+ */
+
+ return result;
+}
+
+static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
+{
+ int size = 0, saved_size, len_pos, i;
+ uint16_t *bs;
+ uint8_t *buf = dec->bs_ptr;
+
+ /* SOI */
+ buf[size++] = 0xff;
+ buf[size++] = 0xd8;
+
+ /* DQT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xdb;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 4; ++i) {
+ if (pic->quantization_table.load_quantiser_table[i] == 0)
+ continue;
+
+ buf[size++] = i;
+ memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
+ size += 64;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - 4);
+
+ saved_size = size;
+
+ /* DHT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc4;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x00 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
+ size += 12;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x10 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
+ size += 162;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* DRI */
+ if (pic->slice_parameter.restart_interval) {
+ buf[size++] = 0xff;
+ buf[size++] = 0xdd;
+ buf[size++] = 0x00;
+ buf[size++] = 0x04;
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->slice_parameter.restart_interval);
+ saved_size = ++size;
+ }
+
+ /* SOF */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc0;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = 0x08;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_height);
+ size++;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_width);
+ size++;
+
+ buf[size++] = pic->picture_parameter.num_components;
+
+ for (i = 0; i < pic->picture_parameter.num_components; ++i) {
+ buf[size++] = pic->picture_parameter.components[i].component_id;
+ buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
+ pic->picture_parameter.components[i].v_sampling_factor;
+ buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* SOS */
+ buf[size++] = 0xff;
+ buf[size++] = 0xda;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = pic->slice_parameter.num_components;
+
+ for (i = 0; i < pic->slice_parameter.num_components; ++i) {
+ buf[size++] = pic->slice_parameter.components[i].component_selector;
+ buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
+ pic->slice_parameter.components[i].ac_table_selector;
+ }
+
+ buf[size++] = 0x00;
+ buf[size++] = 0x3f;
+ buf[size++] = 0x00;
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ dec->bs_ptr += size;
+ dec->bs_size += size;
+}
+
+/**
+ * destroy this video decoder
+ */
+static void ruvd_destroy(struct pipe_video_codec *decoder)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ unsigned i;
+
+ assert(decoder);
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_DESTROY;
+ dec->msg->stream_handle = dec->stream_handle;
+ send_msg_buf(dec);
+
+ flush(dec, 0);
+
+ dec->ws->cs_destroy(dec->cs);
+
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ rvid_destroy_buffer(&dec->bs_buffers[i]);
+ }
+
+ rvid_destroy_buffer(&dec->dpb);
+ rvid_destroy_buffer(&dec->ctx);
+ rvid_destroy_buffer(&dec->sessionctx);
+
+ FREE(dec);
+}
+
+/**
+ * start decoding of a new frame
+ */
+static void ruvd_begin_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ uintptr_t frame;
+
+ assert(decoder);
+
+ frame = ++dec->frame_number;
+ vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
+ &ruvd_destroy_associated_data);
+
+ dec->bs_size = 0;
+ dec->bs_ptr = dec->ws->buffer_map(
+ dec->bs_buffers[dec->cur_buffer].res->buf,
+ dec->cs, PIPE_TRANSFER_WRITE);
+}
+
+/**
+ * decode a macroblock
+ */
+static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ const struct pipe_macroblock *macroblocks,
+ unsigned num_macroblocks)
+{
+ /* not supported (yet) */
+ assert(0);
+}
+
+/**
+ * decode a bitstream
+ */
+static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void * const *buffers,
+ const unsigned *sizes)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ enum pipe_video_format format = u_reduce_video_profile(picture->profile);
+ unsigned i;
+
+ assert(decoder);
+
+ if (!dec->bs_ptr)
+ return;
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
+
+ for (i = 0; i < num_buffers; ++i) {
+ struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
+ unsigned new_size = dec->bs_size + sizes[i];
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ new_size += 2; /* save for EOI */
+
+ if (new_size > buf->res->buf->size) {
+ dec->ws->buffer_unmap(buf->res->buf);
+ if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+ RVID_ERR("Can't resize bitstream buffer!");
+ return;
+ }
+
+ dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE);
+ if (!dec->bs_ptr)
+ return;
+
+ dec->bs_ptr += dec->bs_size;
+ }
+
+ memcpy(dec->bs_ptr, buffers[i], sizes[i]);
+ dec->bs_size += sizes[i];
+ dec->bs_ptr += sizes[i];
+ }
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG) {
+ ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */
+ ((uint8_t *)dec->bs_ptr)[1] = 0xd9;
+ dec->bs_size += 2;
+ dec->bs_ptr += 2;
+ }
+}
+
+/**
+ * end decoding of the current frame
+ */
+static void ruvd_end_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ struct pb_buffer *dt;
+ struct rvid_buffer *msg_fb_it_buf, *bs_buf;
+ unsigned bs_size;
+
+ assert(decoder);
+
+ if (!dec->bs_ptr)
+ return;
+
+ msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ bs_buf = &dec->bs_buffers[dec->cur_buffer];
+
+ bs_size = align(dec->bs_size, 128);
+ memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
+ dec->ws->buffer_unmap(bs_buf->res->buf);
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_DECODE;
+ dec->msg->stream_handle = dec->stream_handle;
+ dec->msg->status_report_feedback_number = dec->frame_number;
+
+ dec->msg->body.decode.stream_type = dec->stream_type;
+ dec->msg->body.decode.decode_flags = 0x1;
+ dec->msg->body.decode.width_in_samples = dec->base.width;
+ dec->msg->body.decode.height_in_samples = dec->base.height;
+
+ if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
+ (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
+ dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
+ dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
+ }
+
+ if (dec->dpb.res)
+ dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
+ dec->msg->body.decode.bsd_size = bs_size;
+ dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
+
+ dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
+
+ switch (u_reduce_video_profile(picture->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+ if (dec->ctx.res == NULL) {
+ unsigned ctx_size;
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
+ else
+ ctx_size = calc_ctx_size_h265_main(dec);
+ if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated context buffer.\n");
+ }
+ rvid_clear_buffer(decoder->context, &dec->ctx);
+ }
+
+ if (dec->ctx.res)
+ dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
+ break;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
+ dec->msg->body.decode.extension_support = 0x1;
+
+ /* set at least the feedback buffer size */
+ dec->fb[0] = dec->fb_size;
+
+ send_msg_buf(dec);
+
+ if (dec->dpb.res)
+ send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+
+ if (dec->ctx.res)
+ send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
+ 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
+ RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
+ send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
+ FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+ if (have_it(dec))
+ send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
+ FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ set_reg(dec, dec->reg.cntl, 1);
+
+ flush(dec, RADEON_FLUSH_ASYNC);
+ next_buffer(dec);
+}
+
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void ruvd_flush(struct pipe_video_codec *decoder)
+{
+}
+
+/**
+ * create and UVD decoder
+ */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ ruvd_set_dtb set_dtb)
+{
+ struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ unsigned dpb_size;
+ unsigned width = templ->width, height = templ->height;
+ unsigned bs_buf_size;
+ struct radeon_info info;
+ struct ruvd_decoder *dec;
+ int r, i;
+
+ ws->query_info(ws, &info);
+
+ switch(u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
+ return vl_create_mpeg12_decoder(context, templ);
+
+ /* fall through */
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ width = align(width, VL_MACROBLOCK_WIDTH);
+ height = align(height, VL_MACROBLOCK_HEIGHT);
+ break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ width = align(width, VL_MACROBLOCK_WIDTH);
+ height = align(height, VL_MACROBLOCK_HEIGHT);
+ break;
+
+ default:
+ break;
+ }
+
+
+ dec = CALLOC_STRUCT(ruvd_decoder);
+
+ if (!dec)
+ return NULL;
+
+ if (info.drm_major < 3)
+ dec->use_legacy = true;
+
+ dec->base = *templ;
+ dec->base.context = context;
+ dec->base.width = width;
+ dec->base.height = height;
+
+ dec->base.destroy = ruvd_destroy;
+ dec->base.begin_frame = ruvd_begin_frame;
+ dec->base.decode_macroblock = ruvd_decode_macroblock;
+ dec->base.decode_bitstream = ruvd_decode_bitstream;
+ dec->base.end_frame = ruvd_end_frame;
+ dec->base.flush = ruvd_flush;
+
+ dec->stream_type = profile2stream_type(dec, info.family);
+ dec->set_dtb = set_dtb;
+ dec->stream_handle = rvid_alloc_stream_handle();
+ dec->screen = context->screen;
+ dec->ws = ws;
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
+ if (!dec->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ dec->fb_size = FB_BUFFER_SIZE;
+ bs_buf_size = width * height * (512 / (16 * 16));
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
+ STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+ if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ msg_fb_it_size, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't allocated message buffers.\n");
+ goto error;
+ }
+
+ if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
+ bs_buf_size, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't allocated bitstream buffers.\n");
+ goto error;
+ }
+
+ rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ }
+
+ dpb_size = calc_dpb_size(dec);
+ if (dpb_size) {
+ if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated dpb.\n");
+ goto error;
+ }
+ rvid_clear_buffer(context, &dec->dpb);
+ }
+
+ dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
+ dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
+ dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
+ dec->reg.cntl = RUVD_ENGINE_CNTL;
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_CREATE;
+ dec->msg->stream_handle = dec->stream_handle;
+ dec->msg->body.create.stream_type = dec->stream_type;
+ dec->msg->body.create.width_in_samples = dec->base.width;
+ dec->msg->body.create.height_in_samples = dec->base.height;
+ dec->msg->body.create.dpb_size = dpb_size;
+ send_msg_buf(dec);
+ r = flush(dec, 0);
+ if (r)
+ goto error;
+
+ next_buffer(dec);
+
+ return &dec->base;
+
+error:
+ if (dec->cs) dec->ws->cs_destroy(dec->cs);
+
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ rvid_destroy_buffer(&dec->bs_buffers[i]);
+ }
+
+ rvid_destroy_buffer(&dec->dpb);
+ rvid_destroy_buffer(&dec->ctx);
+ rvid_destroy_buffer(&dec->sessionctx);
+
+ FREE(dec);
+
+ return NULL;
+}
+
+/* calculate top/bottom offset */
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+{
+ return surface->u.legacy.level[0].offset +
+ layer * surface->u.legacy.level[0].slice_size;
+}
+
+/* hw encode the aspect of macro tiles */
+static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
+{
+ switch (macro_tile_aspect) {
+ default:
+ case 1: macro_tile_aspect = 0; break;
+ case 2: macro_tile_aspect = 1; break;
+ case 4: macro_tile_aspect = 2; break;
+ case 8: macro_tile_aspect = 3; break;
+ }
+ return macro_tile_aspect;
+}
+
+/* hw encode the bank width and height */
+static unsigned bank_wh(unsigned bankwh)
+{
+ switch (bankwh) {
+ default:
+ case 1: bankwh = 0; break;
+ case 2: bankwh = 1; break;
+ case 4: bankwh = 2; break;
+ case 8: bankwh = 3; break;
+ }
+ return bankwh;
+}
+
+/**
+ * fill decoding target field from the luma and chroma surfaces
+ */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma)
+{
+ msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
+ switch (luma->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+ break;
+ case RADEON_SURF_MODE_1D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
+ break;
+ case RADEON_SURF_MODE_2D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
+ if (chroma)
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
+ if (msg->body.decode.dt_field_mode) {
+ msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
+ if (chroma)
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
+ } else {
+ msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+ msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+ }
+
+ if (chroma) {
+ assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
+ assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
+ assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
+ }
+
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
+ msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h
new file mode 100644
index 000000000..c371b1441
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_uvd.h
@@ -0,0 +1,442 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef RADEON_UVD_H
+#define RADEON_UVD_H
+
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+
+/* UVD uses PM4 packet type 0 and 2 */
+#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
+#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3)
+#define RUVD_PKT_TYPE_C 0x3FFFFFFF
+#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
+#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
+#define RUVD_PKT_COUNT_C 0xC000FFFF
+#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
+#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
+#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000
+#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
+#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2))
+
+/* registers involved with UVD */
+#define RUVD_GPCOM_VCPU_CMD 0xEF0C
+#define RUVD_GPCOM_VCPU_DATA0 0xEF10
+#define RUVD_GPCOM_VCPU_DATA1 0xEF14
+#define RUVD_ENGINE_CNTL 0xEF18
+
+#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c
+#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710
+#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714
+#define RUVD_ENGINE_CNTL_SOC15 0x20718
+
+/* UVD commands to VCPU */
+#define RUVD_CMD_MSG_BUFFER 0x00000000
+#define RUVD_CMD_DPB_BUFFER 0x00000001
+#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
+#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
+#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005
+#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
+#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
+#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
+
+/* UVD message types */
+#define RUVD_MSG_CREATE 0
+#define RUVD_MSG_DECODE 1
+#define RUVD_MSG_DESTROY 2
+
+/* UVD stream types */
+#define RUVD_CODEC_H264 0x00000000
+#define RUVD_CODEC_VC1 0x00000001
+#define RUVD_CODEC_MPEG2 0x00000003
+#define RUVD_CODEC_MPEG4 0x00000004
+#define RUVD_CODEC_H264_PERF 0x00000007
+#define RUVD_CODEC_MJPEG 0x00000008
+#define RUVD_CODEC_H265 0x00000010
+
+/* UVD decode target buffer tiling mode */
+#define RUVD_TILE_LINEAR 0x00000000
+#define RUVD_TILE_8X4 0x00000001
+#define RUVD_TILE_8X8 0x00000002
+#define RUVD_TILE_32AS8 0x00000003
+
+/* UVD decode target buffer array mode */
+#define RUVD_ARRAY_MODE_LINEAR 0x00000000
+#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
+#define RUVD_ARRAY_MODE_1D_THIN 0x00000002
+#define RUVD_ARRAY_MODE_2D_THIN 0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005
+
+/* UVD tile config */
+#define RUVD_BANK_WIDTH(x) ((x) << 0)
+#define RUVD_BANK_HEIGHT(x) ((x) << 3)
+#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
+#define RUVD_NUM_BANKS(x) ((x) << 9)
+
+/* H.264 profile definitions */
+#define RUVD_H264_PROFILE_BASELINE 0x00000000
+#define RUVD_H264_PROFILE_MAIN 0x00000001
+#define RUVD_H264_PROFILE_HIGH 0x00000002
+#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003
+#define RUVD_H264_PROFILE_MVC 0x00000004
+
+/* VC-1 profile definitions */
+#define RUVD_VC1_PROFILE_SIMPLE 0x00000000
+#define RUVD_VC1_PROFILE_MAIN 0x00000001
+#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
+
+struct ruvd_mvc_element {
+ uint16_t viewOrderIndex;
+ uint16_t viewId;
+ uint16_t numOfAnchorRefsInL0;
+ uint16_t viewIdOfAnchorRefsInL0[15];
+ uint16_t numOfAnchorRefsInL1;
+ uint16_t viewIdOfAnchorRefsInL1[15];
+ uint16_t numOfNonAnchorRefsInL0;
+ uint16_t viewIdOfNonAnchorRefsInL0[15];
+ uint16_t numOfNonAnchorRefsInL1;
+ uint16_t viewIdOfNonAnchorRefsInL1[15];
+};
+
+struct ruvd_h264 {
+ uint32_t profile;
+ uint32_t level;
+
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_frame_num_minus4;
+
+ uint8_t pic_order_cnt_type;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+ uint8_t num_ref_frames;
+ uint8_t reserved_8bit;
+
+ int8_t pic_init_qp_minus26;
+ int8_t pic_init_qs_minus26;
+ int8_t chroma_qp_index_offset;
+ int8_t second_chroma_qp_index_offset;
+
+ uint8_t num_slice_groups_minus1;
+ uint8_t slice_group_map_type;
+ uint8_t num_ref_idx_l0_active_minus1;
+ uint8_t num_ref_idx_l1_active_minus1;
+
+ uint16_t slice_group_change_rate_minus1;
+ uint16_t reserved_16bit_1;
+
+ uint8_t scaling_list_4x4[6][16];
+ uint8_t scaling_list_8x8[2][64];
+
+ uint32_t frame_num;
+ uint32_t frame_num_list[16];
+ int32_t curr_field_order_cnt_list[2];
+ int32_t field_order_cnt_list[16][2];
+
+ uint32_t decoded_pic_idx;
+
+ uint32_t curr_pic_ref_frame_num;
+
+ uint8_t ref_frame_list[16];
+
+ uint32_t reserved[122];
+
+ struct {
+ uint32_t numViews;
+ uint32_t viewId0;
+ struct ruvd_mvc_element mvcElements[1];
+ } mvc;
+};
+
+struct ruvd_h265 {
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+
+ uint8_t sps_max_dec_pic_buffering_minus1;
+ uint8_t log2_min_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_luma_coding_block_size;
+ uint8_t log2_min_transform_block_size_minus2;
+
+ uint8_t log2_diff_max_min_transform_block_size;
+ uint8_t max_transform_hierarchy_depth_inter;
+ uint8_t max_transform_hierarchy_depth_intra;
+ uint8_t pcm_sample_bit_depth_luma_minus1;
+
+ uint8_t pcm_sample_bit_depth_chroma_minus1;
+ uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+ uint8_t num_extra_slice_header_bits;
+
+ uint8_t num_short_term_ref_pic_sets;
+ uint8_t num_long_term_ref_pic_sps;
+ uint8_t num_ref_idx_l0_default_active_minus1;
+ uint8_t num_ref_idx_l1_default_active_minus1;
+
+ int8_t pps_cb_qp_offset;
+ int8_t pps_cr_qp_offset;
+ int8_t pps_beta_offset_div2;
+ int8_t pps_tc_offset_div2;
+
+ uint8_t diff_cu_qp_delta_depth;
+ uint8_t num_tile_columns_minus1;
+ uint8_t num_tile_rows_minus1;
+ uint8_t log2_parallel_merge_level_minus2;
+
+ uint16_t column_width_minus1[19];
+ uint16_t row_height_minus1[21];
+
+ int8_t init_qp_minus26;
+ uint8_t num_delta_pocs_ref_rps_idx;
+ uint8_t curr_idx;
+ uint8_t reserved1;
+ int32_t curr_poc;
+ uint8_t ref_pic_list[16];
+ int32_t poc_list[16];
+ uint8_t ref_pic_set_st_curr_before[8];
+ uint8_t ref_pic_set_st_curr_after[8];
+ uint8_t ref_pic_set_lt_curr[8];
+
+ uint8_t ucScalingListDCCoefSizeID2[6];
+ uint8_t ucScalingListDCCoefSizeID3[2];
+
+ uint8_t highestTid;
+ uint8_t isNonRef;
+
+ uint8_t p010_mode;
+ uint8_t msb_mode;
+ uint8_t luma_10to8;
+ uint8_t chroma_10to8;
+ uint8_t sclr_luma10to8;
+ uint8_t sclr_chroma10to8;
+
+ uint8_t direct_reflist[2][15];
+};
+
+struct ruvd_vc1 {
+ uint32_t profile;
+ uint32_t level;
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+ uint32_t pic_structure;
+ uint32_t chroma_format;
+};
+
+struct ruvd_mpeg2 {
+ uint32_t decoded_pic_idx;
+ uint32_t ref_pic_idx[2];
+
+ uint8_t load_intra_quantiser_matrix;
+ uint8_t load_nonintra_quantiser_matrix;
+ uint8_t reserved_quantiser_alignement[2];
+ uint8_t intra_quantiser_matrix[64];
+ uint8_t nonintra_quantiser_matrix[64];
+
+ uint8_t profile_and_level_indication;
+ uint8_t chroma_format;
+
+ uint8_t picture_coding_type;
+
+ uint8_t reserved_1;
+
+ uint8_t f_code[2][2];
+ uint8_t intra_dc_precision;
+ uint8_t pic_structure;
+ uint8_t top_field_first;
+ uint8_t frame_pred_frame_dct;
+ uint8_t concealment_motion_vectors;
+ uint8_t q_scale_type;
+ uint8_t intra_vlc_format;
+ uint8_t alternate_scan;
+};
+
+struct ruvd_mpeg4
+{
+ uint32_t decoded_pic_idx;
+ uint32_t ref_pic_idx[2];
+
+ uint32_t variant_type;
+ uint8_t profile_and_level_indication;
+
+ uint8_t video_object_layer_verid;
+ uint8_t video_object_layer_shape;
+
+ uint8_t reserved_1;
+
+ uint16_t video_object_layer_width;
+ uint16_t video_object_layer_height;
+
+ uint16_t vop_time_increment_resolution;
+
+ uint16_t reserved_2;
+
+ uint32_t flags;
+
+ uint8_t quant_type;
+
+ uint8_t reserved_3[3];
+
+ uint8_t intra_quant_mat[64];
+ uint8_t nonintra_quant_mat[64];
+
+ struct {
+ uint8_t sprite_enable;
+
+ uint8_t reserved_4[3];
+
+ uint16_t sprite_width;
+ uint16_t sprite_height;
+ int16_t sprite_left_coordinate;
+ int16_t sprite_top_coordinate;
+
+ uint8_t no_of_sprite_warping_points;
+ uint8_t sprite_warping_accuracy;
+ uint8_t sprite_brightness_change;
+ uint8_t low_latency_sprite_enable;
+ } sprite_config;
+
+ struct {
+ uint32_t flags;
+ uint8_t vol_mode;
+ uint8_t reserved_5[3];
+ } divx_311_config;
+};
+
+/* message between driver and hardware */
+struct ruvd_msg {
+
+ uint32_t size;
+ uint32_t msg_type;
+ uint32_t stream_handle;
+ uint32_t status_report_feedback_number;
+
+ union {
+ struct {
+ uint32_t stream_type;
+ uint32_t session_flags;
+ uint32_t asic_id;
+ uint32_t width_in_samples;
+ uint32_t height_in_samples;
+ uint32_t dpb_buffer;
+ uint32_t dpb_size;
+ uint32_t dpb_model;
+ uint32_t version_info;
+ } create;
+
+ struct {
+ uint32_t stream_type;
+ uint32_t decode_flags;
+ uint32_t width_in_samples;
+ uint32_t height_in_samples;
+
+ uint32_t dpb_buffer;
+ uint32_t dpb_size;
+ uint32_t dpb_model;
+ uint32_t dpb_reserved;
+
+ uint32_t db_offset_alignment;
+ uint32_t db_pitch;
+ uint32_t db_tiling_mode;
+ uint32_t db_array_mode;
+ uint32_t db_field_mode;
+ uint32_t db_surf_tile_config;
+ uint32_t db_aligned_height;
+ uint32_t db_reserved;
+
+ uint32_t use_addr_macro;
+
+ uint32_t bsd_buffer;
+ uint32_t bsd_size;
+
+ uint32_t pic_param_buffer;
+ uint32_t pic_param_size;
+ uint32_t mb_cntl_buffer;
+ uint32_t mb_cntl_size;
+
+ uint32_t dt_buffer;
+ uint32_t dt_pitch;
+ uint32_t dt_tiling_mode;
+ uint32_t dt_array_mode;
+ uint32_t dt_field_mode;
+ uint32_t dt_luma_top_offset;
+ uint32_t dt_luma_bottom_offset;
+ uint32_t dt_chroma_top_offset;
+ uint32_t dt_chroma_bottom_offset;
+ uint32_t dt_surf_tile_config;
+ uint32_t dt_uv_surf_tile_config;
+ // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
+ uint32_t dt_wa_chroma_top_offset;
+ uint32_t dt_wa_chroma_bottom_offset;
+
+ uint32_t reserved[16];
+
+ union {
+ struct ruvd_h264 h264;
+ struct ruvd_h265 h265;
+ struct ruvd_vc1 vc1;
+ struct ruvd_mpeg2 mpeg2;
+ struct ruvd_mpeg4 mpeg4;
+
+ uint32_t info[768];
+ } codec;
+
+ uint8_t extension_support;
+ uint8_t reserved_8bit_1;
+ uint8_t reserved_8bit_2;
+ uint8_t reserved_8bit_3;
+ uint32_t extension_reserved[64];
+ } decode;
+ } body;
+};
+
+/* driver dependent callback */
+typedef struct pb_buffer* (*ruvd_set_dtb)
+(struct ruvd_msg* msg, struct vl_video_buffer *vb);
+
+/* create an UVD decode */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ ruvd_set_dtb set_dtb);
+
+/* fill decoding target field from the luma and chroma surfaces */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma);
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.c b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c
new file mode 100644
index 000000000..16a0127f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.c
@@ -0,0 +1,533 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
+#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
+#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
+#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
+#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
+#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
+#define FW_53 (53 << 24)
+
+/**
+ * flush commands to the hardware
+ */
+static void flush(struct rvce_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
+ enc->task_info_idx = 0;
+ enc->bs_idx = 0;
+}
+
+#if 0
+static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
+{
+ uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ unsigned i = 0;
+ fprintf(stderr, "\n");
+ fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
+ fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "\n");
+ enc->ws->buffer_unmap(fb->res->buf);
+}
+#endif
+
+/**
+ * reset the CPB handling
+ */
+static void reset_cpb(struct rvce_encoder *enc)
+{
+ unsigned i;
+
+ LIST_INITHEAD(&enc->cpb_slots);
+ for (i = 0; i < enc->cpb_num; ++i) {
+ struct rvce_cpb_slot *slot = &enc->cpb_array[i];
+ slot->index = i;
+ slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
+ slot->frame_num = 0;
+ slot->pic_order_cnt = 0;
+ LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
+ }
+}
+
+/**
+ * sort l0 and l1 to the top of the list
+ */
+static void sort_cpb(struct rvce_encoder *enc)
+{
+ struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
+
+ LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
+ if (i->frame_num == enc->pic.ref_idx_l0)
+ l0 = i;
+
+ if (i->frame_num == enc->pic.ref_idx_l1)
+ l1 = i;
+
+ if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
+ break;
+
+ if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
+ l0 && l1)
+ break;
+ }
+
+ if (l1) {
+ LIST_DEL(&l1->list);
+ LIST_ADD(&l1->list, &enc->cpb_slots);
+ }
+
+ if (l0) {
+ LIST_DEL(&l0->list);
+ LIST_ADD(&l0->list, &enc->cpb_slots);
+ }
+}
+
+/**
+ * get number of cpbs based on dpb
+ */
+static unsigned get_cpb_num(struct rvce_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ case 10:
+ dpb = 396;
+ break;
+ case 11:
+ dpb = 900;
+ break;
+ case 12:
+ case 13:
+ case 20:
+ dpb = 2376;
+ break;
+ case 21:
+ dpb = 4752;
+ break;
+ case 22:
+ case 30:
+ dpb = 8100;
+ break;
+ case 31:
+ dpb = 18000;
+ break;
+ case 32:
+ dpb = 20480;
+ break;
+ case 40:
+ case 41:
+ dpb = 32768;
+ break;
+ case 42:
+ dpb = 34816;
+ break;
+ case 50:
+ dpb = 110400;
+ break;
+ default:
+ case 51:
+ case 52:
+ dpb = 184320;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+/**
+ * Get the slot for the currently encoded frame
+ */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+}
+
+/**
+ * Get the slot for L0
+ */
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
+}
+
+/**
+ * Get the slot for L1
+ */
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
+}
+
+/**
+ * Calculate the offsets into the CPB
+ */
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset)
+{
+ unsigned pitch, vpitch, fsize;
+
+ pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
+ vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
+ fsize = pitch * (vpitch + vpitch / 2);
+
+ *luma_offset = slot->index * fsize;
+ *chroma_offset = *luma_offset + pitch * vpitch;
+}
+
+/**
+ * destroy this video encoder
+ */
+static void rvce_destroy(struct pipe_video_codec *encoder)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->session(enc);
+ enc->feedback(enc);
+ enc->destroy(enc);
+ flush(enc);
+ rvid_destroy_buffer(&fb);
+ }
+ rvid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc->cpb_array);
+ FREE(enc);
+}
+
+static void rvce_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
+ struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
+
+ bool need_rate_control =
+ enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
+ enc->pic.quant_i_frames != pic->quant_i_frames ||
+ enc->pic.quant_p_frames != pic->quant_p_frames ||
+ enc->pic.quant_b_frames != pic->quant_b_frames;
+
+ enc->pic = *pic;
+ get_pic_param(enc, pic);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ reset_cpb(enc);
+ else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+ pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
+ sort_cpb(enc);
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = rvid_alloc_stream_handle();
+ rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->session(enc);
+ enc->create(enc);
+ enc->config(enc);
+ enc->feedback(enc);
+ flush(enc);
+ //dump_feedback(enc, &fb);
+ rvid_destroy_buffer(&fb);
+ need_rate_control = false;
+ }
+
+ if (need_rate_control) {
+ enc->session(enc);
+ enc->config(enc);
+ flush(enc);
+ }
+}
+
+static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination,
+ void **fb)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+ if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+ if (!radeon_emitted(enc->cs, 0))
+ enc->session(enc);
+ enc->encode(enc);
+ enc->feedback(enc);
+}
+
+static void rvce_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct rvce_cpb_slot *slot = LIST_ENTRY(
+ struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+
+ if (!enc->dual_inst || enc->bs_idx > 1)
+ flush(enc);
+
+ /* update the CPB backtrack with the just encoded frame */
+ slot->picture_type = enc->pic.picture_type;
+ slot->frame_num = enc->pic.frame_num;
+ slot->pic_order_cnt = enc->pic.pic_order_cnt;
+ if (!enc->pic.not_referenced) {
+ LIST_DEL(&slot->list);
+ LIST_ADD(&slot->list, &enc->cpb_slots);
+ }
+}
+
+static void rvce_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (size) {
+ uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+
+ if (ptr[1]) {
+ *size = ptr[4] - ptr[9];
+ } else {
+ *size = 0;
+ }
+
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+ //dump_feedback(enc, fb);
+ rvid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void rvce_flush(struct pipe_video_codec *encoder)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+
+ flush(enc);
+}
+
+static void rvce_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct rvce_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = {};
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!rscreen->info.vce_fw_version) {
+ RVID_ERR("Kernel doesn't supports VCE!\n");
+ return NULL;
+
+ } else if (!rvce_is_fw_version_supported(rscreen)) {
+ RVID_ERR("Unsupported VCE fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(rvce_encoder);
+ if (!enc)
+ return NULL;
+
+ if (rscreen->info.drm_major == 3)
+ enc->use_vm = true;
+ if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
+ rscreen->info.drm_major == 3)
+ enc->use_vui = true;
+
+ enc->base = *templ;
+ enc->base.context = context;
+
+ enc->base.destroy = rvce_destroy;
+ enc->base.begin_frame = rvce_begin_frame;
+ enc->base.encode_bitstream = rvce_encode_bitstream;
+ enc->base.end_frame = rvce_end_frame;
+ enc->base.flush = rvce_flush;
+ enc->base.get_feedback = rvce_get_feedback;
+ enc->get_buffer = get_buffer;
+
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
+
+ cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ if (enc->dual_pipe)
+ cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
+ RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ tmp_buf->destroy(tmp_buf);
+ if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
+ if (!enc->cpb_array)
+ goto error;
+
+ reset_cpb(enc);
+
+ goto error;
+
+ return &enc->base;
+
+error:
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ rvid_destroy_buffer(&enc->cpb);
+
+ FREE(enc->cpb_array);
+ FREE(enc);
+ return NULL;
+}
+
+/**
+ * check if kernel has the right fw version loaded
+ */
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+{
+ switch (rscreen->info.vce_fw_version) {
+ case FW_40_2_2:
+ case FW_50_0_1:
+ case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
+ case FW_52_0_3:
+ case FW_52_4_3:
+ case FW_52_8_3:
+ return true;
+ default:
+ if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
+ return true;
+ else
+ return false;
+ }
+}
+
+/**
+ * Add the buffer as relocation to the current command submission
+ */
+void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset)
+{
+ int reloc_idx;
+
+ reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ if (enc->use_vm) {
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RVCE_CS(addr >> 32);
+ RVCE_CS(addr);
+ } else {
+ offset += enc->ws->buffer_get_reloc_offset(buf);
+ RVCE_CS(reloc_idx * 4);
+ RVCE_CS(offset);
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_vce.h b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h
new file mode 100644
index 000000000..f79e65c9a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_vce.h
@@ -0,0 +1,462 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef RADEON_VCE_H
+#define RADEON_VCE_H
+
+#include "util/list.h"
+
+#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RVCE_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+ RVCE_CS(cmd)
+#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
+
+#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
+#define RVCE_MAX_AUX_BUFFER_NUM 4
+
+struct r600_common_screen;
+
+/* driver dependent callback */
+typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
+ struct pb_buffer **handle,
+ struct radeon_surf **surface);
+
+/* Coded picture buffer slot */
+struct rvce_cpb_slot {
+ struct list_head list;
+
+ unsigned index;
+ enum pipe_h264_enc_picture_type picture_type;
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+};
+
+struct rvce_rate_control {
+ uint32_t rc_method;
+ uint32_t target_bitrate;
+ uint32_t peak_bitrate;
+ uint32_t frame_rate_num;
+ uint32_t gop_size;
+ uint32_t quant_i_frames;
+ uint32_t quant_p_frames;
+ uint32_t quant_b_frames;
+ uint32_t vbv_buffer_size;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buf_lv;
+ uint32_t max_au_size;
+ uint32_t qp_initial_mode;
+ uint32_t target_bits_picture;
+ uint32_t peak_bits_picture_integer;
+ uint32_t peak_bits_picture_fraction;
+ uint32_t min_qp;
+ uint32_t max_qp;
+ uint32_t skip_frame_enable;
+ uint32_t fill_data_enable;
+ uint32_t enforce_hrd;
+ uint32_t b_pics_delta_qp;
+ uint32_t ref_b_pics_delta_qp;
+ uint32_t rc_reinit_disable;
+ uint32_t enc_lcvbr_init_qp_flag;
+ uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag;
+};
+
+struct rvce_motion_estimation {
+ uint32_t enc_ime_decimation_search;
+ uint32_t motion_est_half_pixel;
+ uint32_t motion_est_quarter_pixel;
+ uint32_t disable_favor_pmv_point;
+ uint32_t force_zero_point_center;
+ uint32_t lsmvert;
+ uint32_t enc_search_range_x;
+ uint32_t enc_search_range_y;
+ uint32_t enc_search1_range_x;
+ uint32_t enc_search1_range_y;
+ uint32_t disable_16x16_frame1;
+ uint32_t disable_satd;
+ uint32_t enable_amd;
+ uint32_t enc_disable_sub_mode;
+ uint32_t enc_ime_skip_x;
+ uint32_t enc_ime_skip_y;
+ uint32_t enc_en_ime_overw_dis_subm;
+ uint32_t enc_ime_overw_dis_subm_no;
+ uint32_t enc_ime2_search_range_x;
+ uint32_t enc_ime2_search_range_y;
+ uint32_t parallel_mode_speedup_enable;
+ uint32_t fme0_enc_disable_sub_mode;
+ uint32_t fme1_enc_disable_sub_mode;
+ uint32_t ime_sw_speedup_enable;
+};
+
+struct rvce_pic_control {
+ uint32_t enc_use_constrained_intra_pred;
+ uint32_t enc_cabac_enable;
+ uint32_t enc_cabac_idc;
+ uint32_t enc_loop_filter_disable;
+ int32_t enc_lf_beta_offset;
+ int32_t enc_lf_alpha_c0_offset;
+ uint32_t enc_crop_left_offset;
+ uint32_t enc_crop_right_offset;
+ uint32_t enc_crop_top_offset;
+ uint32_t enc_crop_bottom_offset;
+ uint32_t enc_num_mbs_per_slice;
+ uint32_t enc_intra_refresh_num_mbs_per_slot;
+ uint32_t enc_force_intra_refresh;
+ uint32_t enc_force_imb_period;
+ uint32_t enc_pic_order_cnt_type;
+ uint32_t log2_max_pic_order_cnt_lsb_minus4;
+ uint32_t enc_sps_id;
+ uint32_t enc_pps_id;
+ uint32_t enc_constraint_set_flags;
+ uint32_t enc_b_pic_pattern;
+ uint32_t weight_pred_mode_b_picture;
+ uint32_t enc_number_of_reference_frames;
+ uint32_t enc_max_num_ref_frames;
+ uint32_t enc_num_default_active_ref_l0;
+ uint32_t enc_num_default_active_ref_l1;
+ uint32_t enc_slice_mode;
+ uint32_t enc_max_slice_size;
+};
+
+struct rvce_task_info {
+ uint32_t offset_of_next_task_info;
+ uint32_t task_operation;
+ uint32_t reference_picture_dependency;
+ uint32_t collocate_flag_dependency;
+ uint32_t feedback_index;
+ uint32_t video_bitstream_ring_index;
+};
+
+struct rvce_feedback_buf_pkg {
+ uint32_t feedback_ring_address_hi;
+ uint32_t feedback_ring_address_lo;
+ uint32_t feedback_ring_size;
+};
+
+struct rvce_rdo {
+ uint32_t enc_disable_tbe_pred_i_frame;
+ uint32_t enc_disable_tbe_pred_p_frame;
+ uint32_t use_fme_interpol_y;
+ uint32_t use_fme_interpol_uv;
+ uint32_t use_fme_intrapol_y;
+ uint32_t use_fme_intrapol_uv;
+ uint32_t use_fme_interpol_y_1;
+ uint32_t use_fme_interpol_uv_1;
+ uint32_t use_fme_intrapol_y_1;
+ uint32_t use_fme_intrapol_uv_1;
+ uint32_t enc_16x16_cost_adj;
+ uint32_t enc_skip_cost_adj;
+ uint32_t enc_force_16x16_skip;
+ uint32_t enc_disable_threshold_calc_a;
+ uint32_t enc_luma_coeff_cost;
+ uint32_t enc_luma_mb_coeff_cost;
+ uint32_t enc_chroma_coeff_cost;
+};
+
+struct rvce_vui {
+ uint32_t aspect_ratio_info_present_flag;
+ uint32_t aspect_ratio_idc;
+ uint32_t sar_width;
+ uint32_t sar_height;
+ uint32_t overscan_info_present_flag;
+ uint32_t overscan_Approp_flag;
+ uint32_t video_signal_type_present_flag;
+ uint32_t video_format;
+ uint32_t video_full_range_flag;
+ uint32_t color_description_present_flag;
+ uint32_t color_prim;
+ uint32_t transfer_char;
+ uint32_t matrix_coef;
+ uint32_t chroma_loc_info_present_flag;
+ uint32_t chroma_loc_top;
+ uint32_t chroma_loc_bottom;
+ uint32_t timing_info_present_flag;
+ uint32_t num_units_in_tick;
+ uint32_t time_scale;
+ uint32_t fixed_frame_rate_flag;
+ uint32_t nal_hrd_parameters_present_flag;
+ uint32_t cpb_cnt_minus1;
+ uint32_t bit_rate_scale;
+ uint32_t cpb_size_scale;
+ uint32_t bit_rate_value_minus;
+ uint32_t cpb_size_value_minus;
+ uint32_t cbr_flag;
+ uint32_t initial_cpb_removal_delay_length_minus1;
+ uint32_t cpb_removal_delay_length_minus1;
+ uint32_t dpb_output_delay_length_minus1;
+ uint32_t time_offset_length;
+ uint32_t low_delay_hrd_flag;
+ uint32_t pic_struct_present_flag;
+ uint32_t bitstream_restriction_present_flag;
+ uint32_t motion_vectors_over_pic_boundaries_flag;
+ uint32_t max_bytes_per_pic_denom;
+ uint32_t max_bits_per_mb_denom;
+ uint32_t log2_max_mv_length_hori;
+ uint32_t log2_max_mv_length_vert;
+ uint32_t num_reorder_frames;
+ uint32_t max_dec_frame_buffering;
+};
+
+struct rvce_enc_operation {
+ uint32_t insert_headers;
+ uint32_t picture_structure;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t force_refresh_map;
+ uint32_t insert_aud;
+ uint32_t end_of_sequence;
+ uint32_t end_of_stream;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t enc_input_frame_y_pitch;
+ uint32_t enc_input_pic_luma_pitch;
+ uint32_t enc_input_pic_chroma_pitch;;
+ uint32_t enc_input_pic_addr_array;
+ uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload;
+ uint32_t enc_input_pic_tile_config;
+ uint32_t enc_pic_type;
+ uint32_t enc_idr_flag;
+ uint32_t enc_idr_pic_id;
+ uint32_t enc_mgs_key_pic;
+ uint32_t enc_reference_flag;
+ uint32_t enc_temporal_layer_index;
+ uint32_t num_ref_idx_active_override_flag;
+ uint32_t num_ref_idx_l0_active_minus1;
+ uint32_t num_ref_idx_l1_active_minus1;
+ uint32_t enc_ref_list_modification_op;
+ uint32_t enc_ref_list_modification_num;
+ uint32_t enc_decoded_picture_marking_op;
+ uint32_t enc_decoded_picture_marking_num;
+ uint32_t enc_decoded_picture_marking_idx;
+ uint32_t enc_decoded_ref_base_picture_marking_op;
+ uint32_t enc_decoded_ref_base_picture_marking_num;
+ uint32_t l0_picture_structure;
+ uint32_t l0_enc_pic_type;
+ uint32_t l0_frame_number;
+ uint32_t l0_picture_order_count;
+ uint32_t l0_luma_offset;
+ uint32_t l0_chroma_offset;
+ uint32_t l1_picture_structure;
+ uint32_t l1_enc_pic_type;
+ uint32_t l1_frame_number;
+ uint32_t l1_picture_order_count;
+ uint32_t l1_luma_offset;
+ uint32_t l1_chroma_offset;
+ uint32_t enc_reconstructed_luma_offset;
+ uint32_t enc_reconstructed_chroma_offset;;
+ uint32_t enc_coloc_buffer_offset;
+ uint32_t enc_reconstructed_ref_base_picture_luma_offset;
+ uint32_t enc_reconstructed_ref_base_picture_chroma_offset;
+ uint32_t enc_reference_ref_base_picture_luma_offset;
+ uint32_t enc_reference_ref_base_picture_chroma_offset;
+ uint32_t picture_count;
+ uint32_t frame_number;
+ uint32_t picture_order_count;
+ uint32_t num_i_pic_remain_in_rcgop;
+ uint32_t num_p_pic_remain_in_rcgop;
+ uint32_t num_b_pic_remain_in_rcgop;
+ uint32_t num_ir_pic_remain_in_rcgop;
+ uint32_t enable_intra_refresh;
+ uint32_t aq_variance_en;
+ uint32_t aq_block_size;
+ uint32_t aq_mb_variance_sel;
+ uint32_t aq_frame_variance_sel;
+ uint32_t aq_param_a;
+ uint32_t aq_param_b;
+ uint32_t aq_param_c;
+ uint32_t aq_param_d;
+ uint32_t aq_param_e;
+ uint32_t context_in_sfb;
+};
+
+struct rvce_enc_create {
+ uint32_t enc_use_circular_buffer;
+ uint32_t enc_profile;
+ uint32_t enc_level;
+ uint32_t enc_pic_struct_restriction;
+ uint32_t enc_image_width;
+ uint32_t enc_image_height;
+ uint32_t enc_ref_pic_luma_pitch;
+ uint32_t enc_ref_pic_chroma_pitch;
+ uint32_t enc_ref_y_height_in_qw;
+ uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo;
+ uint32_t enc_pre_encode_context_buffer_offset;
+ uint32_t enc_pre_encode_input_luma_buffer_offset;
+ uint32_t enc_pre_encode_input_chroma_buffer_offset;
+ uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity;
+};
+
+struct rvce_config_ext {
+ uint32_t enc_enable_perf_logging;
+};
+
+struct rvce_h264_enc_pic {
+ struct rvce_rate_control rc;
+ struct rvce_motion_estimation me;
+ struct rvce_pic_control pc;
+ struct rvce_task_info ti;
+ struct rvce_feedback_buf_pkg fb;
+ struct rvce_rdo rdo;
+ struct rvce_vui vui;
+ struct rvce_enc_operation eo;
+ struct rvce_enc_create ec;
+ struct rvce_config_ext ce;
+
+ unsigned quant_i_frames;
+ unsigned quant_p_frames;
+ unsigned quant_b_frames;
+
+ enum pipe_h264_enc_picture_type picture_type;
+ unsigned frame_num;
+ unsigned frame_num_cnt;
+ unsigned p_remain;
+ unsigned i_remain;
+ unsigned idr_pic_id;
+ unsigned gop_cnt;
+ unsigned gop_size;
+ unsigned pic_order_cnt;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned addrmode_arraymode_disrdo_distwoinstants;
+
+ bool not_referenced;
+ bool is_idr;
+ bool has_ref_pic_list;
+ bool enable_vui;
+ unsigned int ref_pic_list_0[32];
+ unsigned int ref_pic_list_1[32];
+ unsigned int frame_idx[32];
+};
+
+/* VCE encoder representation */
+struct rvce_encoder {
+ struct pipe_video_codec base;
+
+ /* version specific packets */
+ void (*session)(struct rvce_encoder *enc);
+ void (*create)(struct rvce_encoder *enc);
+ void (*feedback)(struct rvce_encoder *enc);
+ void (*rate_control)(struct rvce_encoder *enc);
+ void (*config_extension)(struct rvce_encoder *enc);
+ void (*pic_control)(struct rvce_encoder *enc);
+ void (*motion_estimation)(struct rvce_encoder *enc);
+ void (*rdo)(struct rvce_encoder *enc);
+ void (*vui)(struct rvce_encoder *enc);
+ void (*config)(struct rvce_encoder *enc);
+ void (*encode)(struct rvce_encoder *enc);
+ void (*destroy)(struct rvce_encoder *enc);
+ void (*task_info)(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx,
+ uint32_t ring_idx);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys* ws;
+ struct radeon_winsys_cs* cs;
+
+ rvce_get_buffer get_buffer;
+
+ struct pb_buffer* handle;
+ struct radeon_surf* luma;
+ struct radeon_surf* chroma;
+
+ struct pb_buffer* bs_handle;
+ unsigned bs_size;
+
+ struct rvce_cpb_slot *cpb_array;
+ struct list_head cpb_slots;
+ unsigned cpb_num;
+
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct pipe_h264_enc_picture_desc pic;
+ struct rvce_h264_enc_pic enc_pic;
+
+ unsigned task_info_idx;
+ unsigned bs_idx;
+
+ bool use_vm;
+ bool use_vui;
+ bool dual_pipe;
+ bool dual_inst;
+};
+
+/* CPB handling functions */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset);
+
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer);
+
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+
+void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
+
+/* init vce fw 40.2.2 specific callbacks */
+void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+
+/* init vce fw 50 specific callbacks */
+void radeon_vce_50_init(struct rvce_encoder *enc);
+
+/* init vce fw 52 specific callbacks */
+void radeon_vce_52_init(struct rvce_encoder *enc);
+
+/* version specific function for getting parameters */
+void (*get_pic_param)(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 40.2.2 */
+void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 50 */
+void radeon_vce_50_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 52 */
+void radeon_vce_52_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.c b/lib/mesa/src/gallium/drivers/r600/radeon_video.c
new file mode 100644
index 000000000..c7acc3d6e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.c
@@ -0,0 +1,349 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <unistd.h>
+
+#include "util/u_memory.h"
+#include "util/u_video.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle()
+{
+ static unsigned counter = 0;
+ unsigned stream_handle = 0;
+ unsigned pid = getpid();
+ int i;
+
+ for (i = 0; i < 32; ++i)
+ stream_handle |= ((pid >> i) & 1) << (31 - i);
+
+ stream_handle ^= ++counter;
+ return stream_handle;
+}
+
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage)
+{
+ memset(buffer, 0, sizeof(*buffer));
+ buffer->usage = usage;
+
+ /* Hardware buffer placement restrictions require the kernel to be
+ * able to move buffers around individually, so request a
+ * non-sub-allocated buffer.
+ */
+ buffer->res = (struct r600_resource *)
+ pipe_buffer_create(screen, PIPE_BIND_SHARED,
+ usage, size);
+
+ return buffer->res != NULL;
+}
+
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer)
+{
+ r600_resource_reference(&buffer->res, NULL);
+}
+
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ struct radeon_winsys* ws = rscreen->ws;
+ unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
+ struct rvid_buffer old_buf = *new_buf;
+ void *src = NULL, *dst = NULL;
+
+ if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
+ goto error;
+
+ src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ if (!src)
+ goto error;
+
+ dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ if (!dst)
+ goto error;
+
+ memcpy(dst, src, bytes);
+ if (new_size > bytes) {
+ new_size -= bytes;
+ dst += bytes;
+ memset(dst, 0, new_size);
+ }
+ ws->buffer_unmap(new_buf->res->buf);
+ ws->buffer_unmap(old_buf.res->buf);
+ rvid_destroy_buffer(&old_buf);
+ return true;
+
+error:
+ if (src)
+ ws->buffer_unmap(old_buf.res->buf);
+ rvid_destroy_buffer(new_buf);
+ *new_buf = old_buf;
+ return false;
+}
+
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+
+ rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
+ buffer->res->buf->size, 0);
+ context->flush(context, NULL, 0);
+}
+
+/**
+ * join surfaces into the same buffer with identical tiling params
+ * sumup their sizes and replace the backend buffers with a single bo
+ */
+void rvid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
+{
+ struct radeon_winsys* ws;
+ unsigned best_tiling, best_wh, off;
+ unsigned size, alignment;
+ struct pb_buffer *pb;
+ unsigned i, j;
+
+ ws = rctx->ws;
+
+ for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
+ unsigned wh;
+
+ if (!surfaces[i])
+ continue;
+
+ /* choose the smallest bank w/h for now */
+ wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
+ if (wh < best_wh) {
+ best_wh = wh;
+ best_tiling = i;
+ }
+ }
+
+ for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!surfaces[i])
+ continue;
+
+ /* adjust the texture layer offsets */
+ off = align(off, surfaces[i]->surf_alignment);
+
+ /* copy the tiling parameters */
+ surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
+ surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
+ surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
+ surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
+
+ for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
+ surfaces[i]->u.legacy.level[j].offset += off;
+
+ off += surfaces[i]->surf_size;
+ }
+
+ for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!buffers[i] || !*buffers[i])
+ continue;
+
+ size = align(size, (*buffers[i])->alignment);
+ size += (*buffers[i])->size;
+ alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
+ }
+
+ if (!size)
+ return;
+
+ /* TODO: 2D tiling workaround */
+ alignment *= 2;
+
+ pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_GTT_WC);
+ if (!pb)
+ return;
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!buffers[i] || !*buffers[i])
+ continue;
+
+ pb_reference(buffers[i], pb);
+ }
+
+ pb_reference(&pb, NULL);
+}
+
+int rvid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ enum pipe_video_format codec = u_reduce_video_profile(profile);
+ struct radeon_info info;
+
+ rscreen->ws->query_info(rscreen->ws, &info);
+
+ if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+ rvce_is_fw_version_supported(rscreen);
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ return 2048;
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 1152;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_STACKED_FRAMES:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ return profile != PIPE_VIDEO_PROFILE_MPEG1;
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ /* no support for MPEG4 on older hw */
+ return rscreen->family >= CHIP_PALM;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ return true;
+ case PIPE_VIDEO_FORMAT_VC1:
+ return true;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return false;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ return false;
+ default:
+ return false;
+ }
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ return 2048;
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 1152;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return PIPE_FORMAT_P016;
+ else
+ return PIPE_FORMAT_NV12;
+
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ if (rscreen->family < CHIP_PALM) {
+ /* MPEG2 only with shaders and no support for
+ interlacing on R6xx style UVD */
+ return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+ rscreen->family > CHIP_RV770;
+ } else {
+ enum pipe_video_format format = u_reduce_video_profile(profile);
+
+ if (format == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The firmware doesn't support interlaced HEVC.
+ else if (format == PIPE_VIDEO_FORMAT_JPEG)
+ return false;
+ return true;
+ }
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ switch (profile) {
+ case PIPE_VIDEO_PROFILE_MPEG1:
+ return 0;
+ case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
+ return 5;
+ case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+ return 1;
+ case PIPE_VIDEO_PROFILE_VC1_MAIN:
+ return 2;
+ case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+ return 4;
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ return 41;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+ return 186;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint)
+{
+ /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return (format == PIPE_FORMAT_NV12) ||
+ (format == PIPE_FORMAT_P016);
+
+ /* we can only handle this one with UVD */
+ if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
+ return format == PIPE_FORMAT_NV12;
+
+ return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/radeon_video.h b/lib/mesa/src/gallium/drivers/r600/radeon_video.h
new file mode 100644
index 000000000..3347c4ebc
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/radeon_video.h
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef RADEON_VIDEO_H
+#define RADEON_VIDEO_H
+
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+
+#define RVID_ERR(fmt, args...) \
+ fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
+
+/* video buffer representation */
+struct rvid_buffer
+{
+ unsigned usage;
+ struct r600_resource *res;
+};
+
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle(void);
+
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage);
+
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer);
+
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size);
+
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
+
+/* join surfaces into the same buffer with identical tiling params
+ sumup their sizes and replace the backend buffers with a single bo */
+void rvid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
+
+/* returns supported codecs and other parameters */
+int rvid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param);
+
+/* the hardware only supports NV12 */
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint);
+
+#endif // RADEON_VIDEO_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index ae92a767b..09a326ef6 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -516,7 +516,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
n->src.push_back(get_cf_index_value(1));
}
- if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
+ if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
ctx.is_cayman())
// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
diff --git a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h
index 28bdf15b8..5bfce1ca7 100644
--- a/lib/mesa/src/gallium/drivers/radeon/r600_cs.h
+++ b/lib/mesa/src/gallium/drivers/radeon/r600_cs.h
@@ -31,7 +31,7 @@
#define R600_CS_H
#include "r600_pipe_common.h"
-#include "amd/common/r600d_common.h"
+#include "amd/common/sid.h"
/**
* Return true if there is enough memory in VRAM and GTT for the buffers
@@ -113,27 +113,12 @@ radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
}
-static inline void r600_emit_reloc(struct r600_common_context *rctx,
- struct r600_ring *ring, struct r600_resource *rbo,
- enum radeon_bo_usage usage,
- enum radeon_bo_priority priority)
-{
- struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
- unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
-
- if (!has_vm) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, reloc);
- }
-}
-
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg < R600_CONTEXT_REG_OFFSET);
+ assert(reg < SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -144,10 +129,10 @@ static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned r
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -160,10 +145,10 @@ static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
unsigned reg, unsigned idx,
unsigned value)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 3 <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
diff --git a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h
index f79e65c9a..f34a8eaf8 100644
--- a/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h
+++ b/lib/mesa/src/gallium/drivers/radeon/radeon_vce.h
@@ -40,9 +40,9 @@
#define RVCE_BEGIN(cmd) { \
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
RVCE_CS(cmd)
-#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
-#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
-#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
@@ -417,46 +417,46 @@ struct rvce_encoder {
};
/* CPB handling functions */
-struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
-struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
-struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
-void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- signed *luma_offset, signed *chroma_offset);
+struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc);
+void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset);
-struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
- const struct pipe_video_codec *templat,
- struct radeon_winsys* ws,
- rvce_get_buffer get_buffer);
+struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer);
-bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen);
-void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
- enum radeon_bo_usage usage, enum radeon_bo_domain domain,
- signed offset);
+void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
/* init vce fw 40.2.2 specific callbacks */
-void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+void si_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
-void radeon_vce_50_init(struct rvce_encoder *enc);
+void si_vce_50_init(struct rvce_encoder *enc);
/* init vce fw 52 specific callbacks */
-void radeon_vce_52_init(struct rvce_encoder *enc);
+void si_vce_52_init(struct rvce_encoder *enc);
/* version specific function for getting parameters */
-void (*get_pic_param)(struct rvce_encoder *enc,
+void (*si_get_pic_param)(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 40.2.2 */
-void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_40_2_2_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 50 */
-void radeon_vce_50_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_50_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 52 */
-void radeon_vce_52_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_52_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
#endif
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
new file mode 100644
index 000000000..7f57b4ea8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
@@ -0,0 +1,10 @@
+// DriConf options specific to radeonsi
+DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
+ DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
+ DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_DEBUG
+ DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+DRI_CONF_SECTION_END
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h b/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h
new file mode 100644
index 000000000..532151125
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_driinfo.h
@@ -0,0 +1,49 @@
+// DO NOT EDIT - this file is automatically generated by merge_driinfo.py
+
+/*
+Use as:
+
+#include "xmlpool.h"
+
+static const char driinfo_xml[] =
+#include "this_file"
+;
+*/
+
+DRI_CONF_BEGIN
+ DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_MESA_GLTHREAD("false")
+ DRI_CONF_MESA_NO_ERROR("false")
+ DRI_CONF_DISABLE_EXT_BUFFER_AGE("false")
+ DRI_CONF_DISABLE_OML_SYNC_CONTROL("false")
+ DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
+ DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
+ DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false")
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_QUALITY
+ DRI_CONF_PP_CELSHADE(0)
+ DRI_CONF_PP_NORED(0)
+ DRI_CONF_PP_NOGREEN(0)
+ DRI_CONF_PP_NOBLUE(0)
+ DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+ DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_DEBUG
+ DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+ DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+ DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+ DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false")
+ DRI_CONF_FORCE_GLSL_VERSION(0)
+ DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+ DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false")
+ DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false")
+ DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
+ DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+ DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
+ DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_MISCELLANEOUS
+ DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
+ DRI_CONF_GLSL_ZERO_INIT("false")
+ DRI_CONF_SECTION_END
+DRI_CONF_END
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c
new file mode 100644
index 000000000..7a8822738
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader.h"
+#include "si_shader_internal.h"
+
+#include "ac_nir_to_llvm.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir_types.h"
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static void scan_instruction(struct tgsi_shader_info *info,
+ nir_instr *instr)
+{
+ if (instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ switch (alu->op) {
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ info->uses_derivatives = true;
+ break;
+ default:
+ break;
+ }
+ } else if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+ info->uses_derivatives = true;
+ break;
+ default:
+ break;
+ }
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_front_face:
+ info->uses_frontface = 1;
+ break;
+ case nir_intrinsic_load_instance_id:
+ info->uses_instanceid = 1;
+ break;
+ case nir_intrinsic_load_vertex_id:
+ info->uses_vertexid = 1;
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base:
+ info->uses_vertexid_nobase = 1;
+ break;
+ case nir_intrinsic_load_base_vertex:
+ info->uses_basevertex = 1;
+ break;
+ case nir_intrinsic_load_primitive_id:
+ info->uses_primid = 1;
+ break;
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ info->writes_memory = true;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void si_nir_scan_shader(const struct nir_shader *nir,
+ struct tgsi_shader_info *info)
+{
+ nir_function *func;
+ unsigned i;
+
+ assert(nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_FRAGMENT);
+
+ info->processor = pipe_shader_type_from_mesa(nir->info.stage);
+ info->num_tokens = 2; /* indicate that the shader is non-empty */
+ info->num_instructions = 2;
+
+ info->num_inputs = nir->num_inputs;
+ info->num_outputs = nir->num_outputs;
+
+ i = 0;
+ nir_foreach_variable(variable, &nir->inputs) {
+ unsigned semantic_name, semantic_index;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type,
+ nir->info.stage == MESA_SHADER_VERTEX);
+
+ assert(attrib_count == 1 && "not implemented");
+
+ /* Vertex shader inputs don't have semantics. The state
+ * tracker has already mapped them to attributes via
+ * variable->data.driver_location.
+ */
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ continue;
+
+ /* Fragment shader position is a system value. */
+ if (nir->info.stage == MESA_SHADER_FRAGMENT &&
+ variable->data.location == VARYING_SLOT_POS) {
+ if (variable->data.pixel_center_integer)
+ info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] =
+ TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
+ continue;
+ }
+
+ tgsi_get_gl_varying_semantic(variable->data.location, true,
+ &semantic_name, &semantic_index);
+
+ info->input_semantic_name[i] = semantic_name;
+ info->input_semantic_index[i] = semantic_index;
+
+ if (variable->data.sample)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
+ else if (variable->data.centroid)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
+ else
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
+
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(variable->type));
+
+ switch (variable->data.interpolation) {
+ case INTERP_MODE_NONE:
+ if (glsl_base_type_is_integer(base_type)) {
+ info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
+ goto persp_locations;
+ }
+ /* fall-through */
+ case INTERP_MODE_SMOOTH:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
+
+ persp_locations:
+ if (variable->data.sample)
+ info->uses_persp_sample = true;
+ else if (variable->data.centroid)
+ info->uses_persp_centroid = true;
+ else
+ info->uses_persp_center = true;
+ break;
+
+ case INTERP_MODE_NOPERSPECTIVE:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
+
+ if (variable->data.sample)
+ info->uses_linear_sample = true;
+ else if (variable->data.centroid)
+ info->uses_linear_centroid = true;
+ else
+ info->uses_linear_center = true;
+ break;
+
+ case INTERP_MODE_FLAT:
+ info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ /* TODO make this more precise */
+ if (variable->data.location == VARYING_SLOT_COL0)
+ info->colors_read |= 0x0f;
+ else if (variable->data.location == VARYING_SLOT_COL1)
+ info->colors_read |= 0xf0;
+
+ i++;
+ }
+
+ i = 0;
+ nir_foreach_variable(variable, &nir->outputs) {
+ unsigned semantic_name, semantic_index;
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ tgsi_get_gl_frag_result_semantic(variable->data.location,
+ &semantic_name, &semantic_index);
+ } else {
+ tgsi_get_gl_varying_semantic(variable->data.location, true,
+ &semantic_name, &semantic_index);
+ }
+
+ info->output_semantic_name[i] = semantic_name;
+ info->output_semantic_index[i] = semantic_index;
+ info->output_usagemask[i] = TGSI_WRITEMASK_XYZW;
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_PRIMID:
+ info->writes_primid = true;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ info->writes_viewport_index = true;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ info->writes_layer = true;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ info->writes_psize = true;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ info->writes_clipvertex = true;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ info->colors_written |= 1 << semantic_index;
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ info->writes_stencil = true;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->writes_samplemask = true;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ info->writes_edgeflag = true;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ if (info->processor == PIPE_SHADER_FRAGMENT)
+ info->writes_z = true;
+ else
+ info->writes_position = true;
+ break;
+ }
+
+ i++;
+ }
+
+ nir_foreach_variable(variable, &nir->uniforms) {
+ const struct glsl_type *type = variable->type;
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(type));
+ unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
+
+ /* We rely on the fact that nir_lower_samplers_as_deref has
+ * eliminated struct dereferences.
+ */
+ if (base_type == GLSL_TYPE_SAMPLER)
+ info->samplers_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
+ else if (base_type == GLSL_TYPE_IMAGE)
+ info->images_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
+ }
+
+ info->num_written_clipdistance = nir->info.clip_distance_array_size;
+ info->num_written_culldistance = nir->info.cull_distance_array_size;
+ info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
+ info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
+
+ if (info->processor == PIPE_SHADER_FRAGMENT)
+ info->uses_kill = nir->info.fs.uses_discard;
+
+ /* TODO make this more accurate */
+ info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
+ info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
+
+ func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block)
+ scan_instruction(info, instr);
+ }
+}
+
+/**
+ * Perform "lowering" operations on the NIR that are run once when the shader
+ * selector is created.
+ */
+void
+si_lower_nir(struct si_shader_selector* sel)
+{
+ /* Adjust the driver location of inputs and outputs. The state tracker
+ * interprets them as slots, while the ac/nir backend interprets them
+ * as individual components.
+ */
+ nir_foreach_variable(variable, &sel->nir->inputs)
+ variable->data.driver_location *= 4;
+
+ nir_foreach_variable(variable, &sel->nir->outputs) {
+ variable->data.driver_location *= 4;
+
+ if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+ if (variable->data.location == FRAG_RESULT_DEPTH)
+ variable->data.driver_location += 2;
+ else if (variable->data.location == FRAG_RESULT_STENCIL)
+ variable->data.driver_location += 1;
+ }
+ }
+
+ /* Perform lowerings (and optimizations) of code.
+ *
+ * Performance considerations aside, we must:
+ * - lower certain ALU operations
+ * - ensure constant offsets for texture instructions are folded
+ * and copy-propagated
+ */
+ NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size,
+ (nir_lower_io_options)0);
+ NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo);
+
+ NIR_PASS_V(sel->nir, nir_lower_returns);
+ NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_txp = ~0u,
+ };
+ NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+
+ bool progress;
+ do {
+ progress = false;
+
+ /* (Constant) copy propagation is needed for txf with offsets. */
+ NIR_PASS(progress, sel->nir, nir_copy_prop);
+ NIR_PASS(progress, sel->nir, nir_opt_remove_phis);
+ NIR_PASS(progress, sel->nir, nir_opt_dce);
+ if (nir_opt_trivial_continues(sel->nir)) {
+ progress = true;
+ NIR_PASS(progress, sel->nir, nir_copy_prop);
+ NIR_PASS(progress, sel->nir, nir_opt_dce);
+ }
+ NIR_PASS(progress, sel->nir, nir_opt_if);
+ NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
+ NIR_PASS(progress, sel->nir, nir_opt_cse);
+ NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8);
+
+ /* Needed for algebraic lowering */
+ NIR_PASS(progress, sel->nir, nir_opt_algebraic);
+ NIR_PASS(progress, sel->nir, nir_opt_constant_folding);
+
+ NIR_PASS(progress, sel->nir, nir_opt_undef);
+ NIR_PASS(progress, sel->nir, nir_opt_conditional_discard);
+ if (sel->nir->options->max_unroll_iterations) {
+ NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
+ }
+ } while (progress);
+}
+
+static void declare_nir_input_vs(struct si_shader_context *ctx,
+ struct nir_variable *variable, unsigned rel,
+ LLVMValueRef out[4])
+{
+ si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out);
+}
+
+static void declare_nir_input_fs(struct si_shader_context *ctx,
+ struct nir_variable *variable, unsigned rel,
+ unsigned *fs_attr_idx,
+ LLVMValueRef out[4])
+{
+ unsigned slot = variable->data.location + rel;
+
+ assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0);
+
+ if (slot == VARYING_SLOT_POS) {
+ out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
+ out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
+ out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
+ out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
+ LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
+ return;
+ }
+
+ si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
+ (*fs_attr_idx)++;
+}
+
+static LLVMValueRef
+si_nir_load_sampler_desc(struct ac_shader_abi *abi,
+ unsigned descriptor_set, unsigned base_index,
+ unsigned constant_index, LLVMValueRef dynamic_index,
+ enum ac_descriptor_type desc_type, bool image,
+ bool write)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
+ LLVMValueRef index = dynamic_index;
+
+ assert(!descriptor_set);
+
+ if (!index)
+ index = ctx->ac.i32_0;
+
+ index = LLVMBuildAdd(builder, index,
+ LLVMConstInt(ctx->ac.i32, base_index + constant_index, false),
+ "");
+
+ if (image) {
+ assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
+ assert(base_index + constant_index < ctx->num_images);
+
+ if (dynamic_index)
+ index = si_llvm_bound_index(ctx, index, ctx->num_images);
+
+ index = LLVMBuildSub(ctx->gallivm.builder,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+ index, "");
+
+ /* TODO: be smarter about when we use dcc_off */
+ return si_load_image_desc(ctx, list, index, desc_type, write);
+ }
+
+ assert(base_index + constant_index < ctx->num_samplers);
+
+ if (dynamic_index)
+ index = si_llvm_bound_index(ctx, index, ctx->num_samplers);
+
+ index = LLVMBuildAdd(ctx->gallivm.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+
+ return si_load_sampler_desc(ctx, list, index, desc_type);
+}
+
+bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
+{
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+
+ unsigned fs_attr_idx = 0;
+ nir_foreach_variable(variable, &nir->inputs) {
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type,
+ nir->info.stage == MESA_SHADER_VERTEX);
+ unsigned input_idx = variable->data.driver_location;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef data[4];
+
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ declare_nir_input_vs(ctx, variable, i, data);
+ else if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[input_idx + chan] =
+ LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
+ }
+ }
+ }
+
+ ctx->abi.inputs = &ctx->inputs[0];
+ ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
+ ctx->abi.clamp_shadow_reference = true;
+
+ ctx->num_samplers = util_last_bit(info->samplers_declared);
+ ctx->num_images = util_last_bit(info->images_declared);
+
+ ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);
+
+ return true;
+}
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c
new file mode 100644
index 000000000..8d98d6d0d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file handles register programming of primitive binning. */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "gfx9d.h"
+#include "radeon/r600_cs.h"
+
+struct uvec2 {
+ unsigned x, y;
+};
+
+struct si_bin_size_map {
+ unsigned start;
+ unsigned bin_size_x;
+ unsigned bin_size_y;
+};
+
+typedef struct si_bin_size_map si_bin_size_subtable[3][9];
+
+/* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */
+static struct uvec2 si_find_bin_size(struct si_screen *sscreen,
+ const si_bin_size_subtable table[],
+ unsigned sum)
+{
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(sscreen->b.info.num_render_backends /
+ sscreen->b.info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(sscreen->b.info.max_se);
+ unsigned i;
+
+ /* Get the chip-specific subtable. */
+ const struct si_bin_size_map *subtable =
+ &table[log_num_rb_per_se][log_num_se][0];
+
+ for (i = 0; subtable[i].start != UINT_MAX; i++) {
+ if (sum >= subtable[i].start && sum < subtable[i + 1].start)
+ break;
+ }
+
+ struct uvec2 size = {subtable[i].bin_size_x, subtable[i].bin_size_y};
+ return size;
+}
+
+static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
+ unsigned cb_target_enabled_4bit)
+{
+ unsigned nr_samples = sctx->framebuffer.nr_samples;
+ unsigned sum = 0;
+
+ /* Compute the sum of all Bpp. */
+ for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ if (!(cb_target_enabled_4bit & (0xf << (i * 4))))
+ continue;
+
+ struct r600_texture *rtex =
+ (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
+ sum += rtex->surface.bpe;
+ }
+
+ /* Multiply the sum by some function of the number of samples. */
+ if (nr_samples >= 2) {
+ if (sctx->ps_iter_samples >= 2)
+ sum *= nr_samples;
+ else
+ sum *= 2;
+ }
+
+ static const si_bin_size_subtable table[] = {
+ {
+ /* One RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 128 },
+ { 1, 64, 128 },
+ { 2, 32, 128 },
+ { 3, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 128, 128 },
+ { 2, 64, 128 },
+ { 3, 32, 128 },
+ { 5, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ /* Two RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 128 },
+ { 2, 64, 128 },
+ { 3, 32, 128 },
+ { 5, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 32, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 3, 128, 128 },
+ { 5, 64, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ /* Four RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 256 },
+ { 2, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 32, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 3, 128, 128 },
+ { 5, 64, 128 },
+ { 9, 32, 128 },
+ { 17, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 3, 128, 256 },
+ { 5, 128, 128 },
+ { 9, 64, 128 },
+ { 17, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ };
+
+ return si_find_bin_size(sctx->screen, table, sum);
+}
+
+static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
+{
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+
+ if (!sctx->framebuffer.state.zsbuf ||
+ (!dsa->depth_enabled && !dsa->stencil_enabled)) {
+ /* Return the max size. */
+ struct uvec2 size = {512, 512};
+ return size;
+ }
+
+ struct r600_texture *rtex =
+ (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+ unsigned depth_coeff = dsa->depth_enabled ? 5 : 0;
+ unsigned stencil_coeff = rtex->surface.has_stencil &&
+ dsa->stencil_enabled ? 1 : 0;
+ unsigned sum = 4 * (depth_coeff + stencil_coeff) *
+ sctx->framebuffer.nr_samples;
+
+ static const si_bin_size_subtable table[] = {
+ {
+ // One RB / SE
+ {
+ // One shader engine
+ { 0, 128, 256 },
+ { 2, 128, 128 },
+ { 4, 64, 128 },
+ { 7, 32, 128 },
+ { 13, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 4, 128, 128 },
+ { 7, 64, 128 },
+ { 13, 32, 128 },
+ { 25, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ // Two RB / SE
+ {
+ // One shader engine
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 4, 128, 128 },
+ { 7, 64, 128 },
+ { 13, 32, 128 },
+ { 25, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 32, 128 },
+ { 49, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 512, 512 },
+ { 2, 256, 512 },
+ { 4, 256, 256 },
+ { 7, 128, 256 },
+ { 13, 128, 128 },
+ { 25, 64, 128 },
+ { 49, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ // Four RB / SE
+ {
+ // One shader engine
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 32, 128 },
+ { 49, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 512, 512 },
+ { 2, 256, 512 },
+ { 4, 256, 256 },
+ { 7, 128, 256 },
+ { 13, 128, 128 },
+ { 25, 64, 128 },
+ { 49, 32, 128 },
+ { 97, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 512, 512 },
+ { 4, 256, 512 },
+ { 7, 256, 256 },
+ { 13, 128, 256 },
+ { 25, 128, 128 },
+ { 49, 64, 128 },
+ { 97, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ };
+
+ return si_find_bin_size(sctx->screen, table, sum);
+}
+
+static void si_emit_dpbb_disable(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1));
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
+}
+
+void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
+{
+ struct si_screen *sscreen = sctx->screen;
+ struct si_state_blend *blend = sctx->queued.named.blend;
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+ unsigned db_shader_control = sctx->ps_db_shader_control;
+
+ assert(sctx->b.chip_class >= GFX9);
+
+ if (!sscreen->dpbb_allowed || !blend || !dsa) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ bool ps_can_kill = G_02880C_KILL_ENABLE(db_shader_control) ||
+ G_02880C_MASK_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) ||
+ blend->alpha_to_coverage;
+
+ /* This is ported from Vulkan, but it doesn't make much sense to me.
+ * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
+ */
+ bool ps_can_reject_z_trivially =
+ !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control);
+
+ /* Disable binning if PS can kill trivially with DB writes.
+ * Ported from Vulkan. (heuristic?)
+ */
+ if (ps_can_kill &&
+ ps_can_reject_z_trivially &&
+ sctx->framebuffer.state.zsbuf &&
+ dsa->db_can_write) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Compute the bin size. */
+ /* TODO: We could also look at enabled pixel shader outputs. */
+ unsigned cb_target_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit &
+ blend->cb_target_enabled_4bit;
+ struct uvec2 color_bin_size =
+ si_get_color_bin_size(sctx, cb_target_enabled_4bit);
+ struct uvec2 depth_bin_size = si_get_depth_bin_size(sctx);
+
+ unsigned color_area = color_bin_size.x * color_bin_size.y;
+ unsigned depth_area = depth_bin_size.x * depth_bin_size.y;
+
+ struct uvec2 bin_size = color_area < depth_area ? color_bin_size
+ : depth_bin_size;
+
+ if (!bin_size.x || !bin_size.y) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Enable DFSM if it's preferred. */
+ unsigned punchout_mode = V_028060_FORCE_OFF;
+ bool disable_start_of_prim = true;
+
+ if (sscreen->dfsm_allowed &&
+ cb_target_enabled_4bit &&
+ !G_02880C_KILL_ENABLE(db_shader_control) &&
+ /* These two also imply that DFSM is disabled when PS writes to memory. */
+ !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) &&
+ !G_02880C_EXEC_ON_NOOP(db_shader_control) &&
+ G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) {
+ punchout_mode = V_028060_AUTO;
+ disable_start_of_prim = (cb_target_enabled_4bit &
+ blend->blend_enable_4bit) != 0;
+ }
+
+ /* Tunable parameters. Also test with DFSM enabled/disabled. */
+ unsigned context_states_per_bin; /* allowed range: [0, 5] */
+ unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ switch (sctx->b.family) {
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ /* Tuned for Raven. Vega might need different values. */
+ context_states_per_bin = 5;
+ persistent_states_per_bin = 31;
+ fpovs_per_batch = 63;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* Emit registers. */
+ struct uvec2 bin_size_extend = {};
+ if (bin_size.x >= 32)
+ bin_size_extend.x = util_logbase2(bin_size.x) - 5;
+ if (bin_size.y >= 32)
+ bin_size_extend.y = util_logbase2(bin_size.y) - 5;
+
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.x == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
+ S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
+ S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1));
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(punchout_mode));
+}
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c
new file mode 100644
index 000000000..133f1e4aa
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+/* For MSAA sample positions. */
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
+ (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
+ (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
+ (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+
+/* 2xMSAA
+ * There are two locations (4, 4), (-4, -4). */
+static const uint32_t sample_locs_2x[4] = {
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+};
+/* 4xMSAA
+ * There are 4 locations: (-2, -6), (6, -2), (-6, 2), (2, 6). */
+static const uint32_t sample_locs_4x[4] = {
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+};
+
+/* Cayman 8xMSAA */
+static const uint32_t sample_locs_8x[] = {
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+};
+/* Cayman 16xMSAA */
+static const uint32_t sample_locs_16x[] = {
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+};
+
+static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 16:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (sample_locs_16x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_16x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
+void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples)
+{
+ switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
+ case 2:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
+ break;
+ case 4:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
+ break;
+ case 8:
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_emit(cs, sample_locs_8x[0]);
+ radeon_emit(cs, sample_locs_8x[4]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[1]);
+ radeon_emit(cs, sample_locs_8x[5]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[2]);
+ radeon_emit(cs, sample_locs_8x[6]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[3]);
+ radeon_emit(cs, sample_locs_8x[7]);
+ break;
+ case 16:
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_emit(cs, sample_locs_16x[0]);
+ radeon_emit(cs, sample_locs_16x[4]);
+ radeon_emit(cs, sample_locs_16x[8]);
+ radeon_emit(cs, sample_locs_16x[12]);
+ radeon_emit(cs, sample_locs_16x[1]);
+ radeon_emit(cs, sample_locs_16x[5]);
+ radeon_emit(cs, sample_locs_16x[9]);
+ radeon_emit(cs, sample_locs_16x[13]);
+ radeon_emit(cs, sample_locs_16x[2]);
+ radeon_emit(cs, sample_locs_16x[6]);
+ radeon_emit(cs, sample_locs_16x[10]);
+ radeon_emit(cs, sample_locs_16x[14]);
+ radeon_emit(cs, sample_locs_16x[3]);
+ radeon_emit(cs, sample_locs_16x[7]);
+ radeon_emit(cs, sample_locs_16x[11]);
+ radeon_emit(cs, sample_locs_16x[15]);
+ break;
+ }
+}
+
+void si_init_msaa_functions(struct si_context *sctx)
+{
+ int i;
+
+ sctx->b.b.get_sample_position = si_get_sample_position;
+
+ si_get_sample_position(&sctx->b.b, 1, 0, sctx->sample_locations_1x[0]);
+
+ for (i = 0; i < 2; i++)
+ si_get_sample_position(&sctx->b.b, 2, i, sctx->sample_locations_2x[i]);
+ for (i = 0; i < 4; i++)
+ si_get_sample_position(&sctx->b.b, 4, i, sctx->sample_locations_4x[i]);
+ for (i = 0; i < 8; i++)
+ si_get_sample_position(&sctx->b.b, 8, i, sctx->sample_locations_8x[i]);
+ for (i = 0; i < 16; i++)
+ si_get_sample_position(&sctx->b.b, 16, i, sctx->sample_locations_16x[i]);
+}
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c
new file mode 100644
index 000000000..9971bc815
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -0,0 +1,423 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+
+#include "si_pipe.h"
+#include "si_state.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+#include "util/u_memory.h"
+
+static void si_set_streamout_enable(struct si_context *sctx, bool enable);
+
+static inline void si_so_target_reference(struct si_streamout_target **dst,
+ struct pipe_stream_output_target *src)
+{
+ pipe_so_target_reference((struct pipe_stream_output_target**)dst, src);
+}
+
+static struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_streamout_target *t;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+
+ t = CALLOC_STRUCT(si_streamout_target);
+ if (!t) {
+ return NULL;
+ }
+
+ u_suballocator_alloc(sctx->b.allocator_zeroed_memory, 4, 4,
+ &t->buf_filled_size_offset,
+ (struct pipe_resource**)&t->buf_filled_size);
+ if (!t->buf_filled_size) {
+ FREE(t);
+ return NULL;
+ }
+
+ t->b.reference.count = 1;
+ t->b.context = ctx;
+ pipe_resource_reference(&t->b.buffer, buffer);
+ t->b.buffer_offset = buffer_offset;
+ t->b.buffer_size = buffer_size;
+
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
+ return &t->b;
+}
+
+static void si_so_target_destroy(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct si_streamout_target *t = (struct si_streamout_target*)target;
+ pipe_resource_reference(&t->b.buffer, NULL);
+ r600_resource_reference(&t->buf_filled_size, NULL);
+ FREE(t);
+}
+
+void si_streamout_buffers_dirty(struct si_context *sctx)
+{
+ if (!sctx->streamout.enabled_mask)
+ return;
+
+ si_mark_atom_dirty(sctx, &sctx->streamout.begin_atom);
+ si_set_streamout_enable(sctx, true);
+}
+
+static void si_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_buffer_resources *buffers = &sctx->rw_buffers;
+ struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ unsigned old_num_targets = sctx->streamout.num_targets;
+ unsigned i, bufidx;
+
+ /* We are going to unbind the buffers. Mark which caches need to be flushed. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
+ /* Since streamout uses vector writes which go through TC L2
+ * and most other clients can use TC L2 as well, we don't need
+ * to flush it.
+ *
+ * The only cases which requires flushing it is VGT DMA index
+ * fetching (on <= CIK) and indirect draw data, which are rare
+ * cases. Thus, flag the TC L2 dirtiness in the resource and
+ * handle it at draw call time.
+ */
+ for (i = 0; i < sctx->streamout.num_targets; i++)
+ if (sctx->streamout.targets[i])
+ r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+ /* Invalidate the scalar cache in case a streamout buffer is
+ * going to be used as a constant buffer.
+ *
+ * Invalidate TC L1, because streamout bypasses it (done by
+ * setting GLC=1 in the store instruction), but it can contain
+ * outdated data of streamout buffers.
+ *
+ * VS_PARTIAL_FLUSH is required if the buffers are going to be
+ * used as an input immediately.
+ */
+ sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_VS_PARTIAL_FLUSH;
+ }
+
+ /* All readers of the streamout targets need to be finished before we can
+ * start writing to the targets.
+ */
+ if (num_targets)
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ /* Streamout buffers must be bound in 2 places:
+ * 1) in VGT by setting the VGT_STRMOUT registers
+ * 2) as shader resources
+ */
+
+ /* Stop streamout. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
+ si_emit_streamout_end(sctx);
+
+ /* Set the new targets. */
+ unsigned enabled_mask = 0, append_bitmask = 0;
+ for (i = 0; i < num_targets; i++) {
+ si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
+ if (!targets[i])
+ continue;
+
+ r600_context_add_resource_size(ctx, targets[i]->buffer);
+ enabled_mask |= 1 << i;
+
+ if (offsets[i] == ((unsigned)-1))
+ append_bitmask |= 1 << i;
+ }
+
+ for (; i < sctx->streamout.num_targets; i++)
+ si_so_target_reference(&sctx->streamout.targets[i], NULL);
+
+ sctx->streamout.enabled_mask = enabled_mask;
+ sctx->streamout.num_targets = num_targets;
+ sctx->streamout.append_bitmask = append_bitmask;
+
+ /* Update dirty state bits. */
+ if (num_targets) {
+ si_streamout_buffers_dirty(sctx);
+ } else {
+ si_set_atom_dirty(sctx, &sctx->streamout.begin_atom, false);
+ si_set_streamout_enable(sctx, false);
+ }
+
+ /* Set the shader resources.*/
+ for (i = 0; i < num_targets; i++) {
+ bufidx = SI_VS_STREAMOUT_BUF0 + i;
+
+ if (targets[i]) {
+ struct pipe_resource *buffer = targets[i]->buffer;
+ uint64_t va = r600_resource(buffer)->gpu_address;
+
+ /* Set the descriptor.
+ *
+ * On VI, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
+ uint32_t *desc = descs->list + bufidx*4;
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+ desc[2] = 0xffffffff;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ /* Set the resource. */
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ buffer);
+ radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource*)buffer,
+ buffers->shader_usage,
+ RADEON_PRIO_SHADER_RW_BUFFER,
+ true);
+ r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
+
+ buffers->enabled_mask |= 1u << bufidx;
+ } else {
+ /* Clear the descriptor and unset the resource. */
+ memset(descs->list + bufidx*4, 0,
+ sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ NULL);
+ buffers->enabled_mask &= ~(1u << bufidx);
+ }
+ }
+ for (; i < old_num_targets; i++) {
+ bufidx = SI_VS_STREAMOUT_BUF0 + i;
+ /* Clear the descriptor and unset the resource. */
+ memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx], NULL);
+ buffers->enabled_mask &= ~(1u << bufidx);
+ }
+
+ sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
+static void si_flush_vgt_streamout(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ unsigned reg_strmout_cntl;
+
+ /* The register is at different places on different ASICs. */
+ if (sctx->b.chip_class >= CIK) {
+ reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+ radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+ } else {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+static void si_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct si_context *sctx = (struct si_context*)rctx;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ struct si_streamout_target **t = sctx->streamout.targets;
+ uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
+ unsigned i;
+
+ si_flush_vgt_streamout(sctx);
+
+ for (i = 0; i < sctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ t[i]->stride_in_dw = stride_in_dw[i];
+
+ /* SI binds streamout buffers as shader resources.
+ * VGT only counts primitives and tells the shader
+ * through SGPRs what to do. */
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+ radeon_emit(cs, (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+
+ if (sctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
+ uint64_t va = t[i]->buf_filled_size->gpu_address +
+ t[i]->buf_filled_size_offset;
+
+ /* Append. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ t[i]->buf_filled_size,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_SO_FILLED_SIZE);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ sctx->streamout.begin_emitted = true;
+}
+
+void si_emit_streamout_end(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ struct si_streamout_target **t = sctx->streamout.targets;
+ unsigned i;
+ uint64_t va;
+
+ si_flush_vgt_streamout(sctx);
+
+ for (i = 0; i < sctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ t[i]->buf_filled_size,
+ RADEON_USAGE_WRITE,
+ RADEON_PRIO_SO_FILLED_SIZE);
+
+ /* Zero the buffer size. The counters (primitives generated,
+ * primitives emitted) may be enabled even if there is not
+ * buffer bound. This ensures that the primitives-emitted query
+ * won't increment. */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ t[i]->buf_filled_size_valid = true;
+ }
+
+ sctx->streamout.begin_emitted = false;
+ sctx->b.flags |= R600_CONTEXT_STREAMOUT_FLUSH;
+}
+
+/* STREAMOUT CONFIG DERIVED STATE
+ *
+ * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
+ * The buffer mask is an independent state, so no writes occur if there
+ * are no buffers bound.
+ */
+
+static void si_emit_streamout_enable(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ struct si_context *sctx = (struct si_context*)rctx;
+
+ radeon_set_context_reg_seq(sctx->b.gfx.cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ radeon_emit(sctx->b.gfx.cs,
+ S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
+ S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx)) |
+ S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx)) |
+ S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
+ radeon_emit(sctx->b.gfx.cs,
+ sctx->streamout.hw_enabled_mask &
+ sctx->streamout.enabled_stream_buffers_mask);
+}
+
+static void si_set_streamout_enable(struct si_context *sctx, bool enable)
+{
+ bool old_strmout_en = si_get_strmout_en(sctx);
+ unsigned old_hw_enabled_mask = sctx->streamout.hw_enabled_mask;
+
+ sctx->streamout.streamout_enabled = enable;
+
+ sctx->streamout.hw_enabled_mask = sctx->streamout.enabled_mask |
+ (sctx->streamout.enabled_mask << 4) |
+ (sctx->streamout.enabled_mask << 8) |
+ (sctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != si_get_strmout_en(sctx)) ||
+ (old_hw_enabled_mask != sctx->streamout.hw_enabled_mask))
+ si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+}
+
+void si_update_prims_generated_query_state(struct si_context *sctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ bool old_strmout_en = si_get_strmout_en(sctx);
+
+ sctx->streamout.num_prims_gen_queries += diff;
+ assert(sctx->streamout.num_prims_gen_queries >= 0);
+
+ sctx->streamout.prims_gen_query_enabled =
+ sctx->streamout.num_prims_gen_queries != 0;
+
+ if (old_strmout_en != si_get_strmout_en(sctx))
+ si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+ }
+}
+
+void si_init_streamout_functions(struct si_context *sctx)
+{
+ sctx->b.b.create_stream_output_target = si_create_so_target;
+ sctx->b.b.stream_output_target_destroy = si_so_target_destroy;
+ sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+ sctx->streamout.begin_atom.emit = si_emit_streamout_begin;
+ sctx->streamout.enable_atom.emit = si_emit_streamout_enable;
+}
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c b/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c
new file mode 100644
index 000000000..f41655847
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+#include "util/u_viewport.h"
+#include "tgsi/tgsi_scan.h"
+
+#define SI_MAX_SCISSOR 16384
+
+static void si_set_scissor_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *state)
+{
+ struct si_context *ctx = (struct si_context *)pctx;
+ int i;
+
+ for (i = 0; i < num_scissors; i++)
+ ctx->scissors.states[start_slot + i] = state[i];
+
+ if (!ctx->queued.named.rasterizer ||
+ !ctx->queued.named.rasterizer->scissor_enable)
+ return;
+
+ ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+}
+
+/* Since the guard band disables clipping, we have to clip per-pixel
+ * using a scissor.
+ */
+static void si_get_scissor_from_viewport(struct si_context *ctx,
+ const struct pipe_viewport_state *vp,
+ struct si_signed_scissor *scissor)
+{
+ float tmp, minx, miny, maxx, maxy;
+
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ minx = -vp->scale[0] + vp->translate[0];
+ miny = -vp->scale[1] + vp->translate[1];
+ maxx = vp->scale[0] + vp->translate[0];
+ maxy = vp->scale[1] + vp->translate[1];
+
+ /* Handle inverted viewports. */
+ if (minx > maxx) {
+ tmp = minx;
+ minx = maxx;
+ maxx = tmp;
+ }
+ if (miny > maxy) {
+ tmp = miny;
+ miny = maxy;
+ maxy = tmp;
+ }
+
+ /* Convert to integer and round up the max bounds. */
+ scissor->minx = minx;
+ scissor->miny = miny;
+ scissor->maxx = ceilf(maxx);
+ scissor->maxy = ceilf(maxy);
+}
+
+static void si_clamp_scissor(struct si_context *ctx,
+ struct pipe_scissor_state *out,
+ struct si_signed_scissor *scissor)
+{
+ out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR);
+ out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR);
+ out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR);
+ out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR);
+}
+
+static void si_clip_scissor(struct pipe_scissor_state *out,
+ struct pipe_scissor_state *clip)
+{
+ out->minx = MAX2(out->minx, clip->minx);
+ out->miny = MAX2(out->miny, clip->miny);
+ out->maxx = MIN2(out->maxx, clip->maxx);
+ out->maxy = MIN2(out->maxy, clip->maxy);
+}
+
+static void si_scissor_make_union(struct si_signed_scissor *out,
+ struct si_signed_scissor *in)
+{
+ out->minx = MIN2(out->minx, in->minx);
+ out->miny = MIN2(out->miny, in->miny);
+ out->maxx = MAX2(out->maxx, in->maxx);
+ out->maxy = MAX2(out->maxy, in->maxy);
+}
+
+static void si_emit_one_scissor(struct si_context *ctx,
+ struct radeon_winsys_cs *cs,
+ struct si_signed_scissor *vp_scissor,
+ struct pipe_scissor_state *scissor)
+{
+ struct pipe_scissor_state final;
+
+ if (ctx->vs_disables_clipping_viewport) {
+ final.minx = final.miny = 0;
+ final.maxx = final.maxy = SI_MAX_SCISSOR;
+ } else {
+ si_clamp_scissor(ctx, &final, vp_scissor);
+ }
+
+ if (scissor)
+ si_clip_scissor(&final, scissor);
+
+ radeon_emit(cs, S_028250_TL_X(final.minx) |
+ S_028250_TL_Y(final.miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(final.maxx) |
+ S_028254_BR_Y(final.maxy));
+}
+
+/* the range is [-MAX, MAX] */
+#define GET_MAX_VIEWPORT_RANGE(rctx) (32768)
+
+static void si_emit_guardband(struct si_context *ctx,
+ struct si_signed_scissor *vp_as_scissor)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+ float discard_x, discard_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor->minx == vp_as_scissor->maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor->miny == vp_as_scissor->maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * Use a limit one pixel smaller to allow for some precision error.
+ */
+ max_range = GET_MAX_VIEWPORT_RANGE(ctx) - 1;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = ( max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = ( max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ discard_x = 1.0;
+ discard_y = 1.0;
+
+ if (unlikely(ctx->current_rast_prim < PIPE_PRIM_TRIANGLES) &&
+ ctx->queued.named.rasterizer) {
+ /* When rendering wide points or lines, we need to be more
+ * conservative about when to discard them entirely. */
+ const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
+ float pixels;
+
+ if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
+ pixels = rs->max_point_size;
+ else
+ pixels = rs->line_width;
+
+ /* Add half the point size / line width */
+ discard_x += pixels / (2.0 * vp.scale[0]);
+ discard_y += pixels / (2.0 * vp.scale[1]);
+
+ /* Discard primitives that would lie entirely outside the clip
+ * region. */
+ discard_x = MIN2(discard_x, guardband_x);
+ discard_y = MIN2(discard_y, guardband_y);
+ }
+
+ /* If any of the GB registers is updated, all of them must be updated. */
+ radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+
+ radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+}
+
+static void si_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct si_context *ctx = (struct si_context *)rctx;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_scissor_state *states = ctx->scissors.states;
+ unsigned mask = ctx->scissors.dirty_mask;
+ bool scissor_enabled = false;
+ struct si_signed_scissor max_vp_scissor;
+ int i;
+
+ if (ctx->queued.named.rasterizer)
+ scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
+
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
+ si_emit_guardband(ctx, vp);
+ ctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ /* Shaders can draw to any viewport. Make a union of all viewports. */
+ max_vp_scissor = ctx->viewports.as_scissor[0];
+ for (i = 1; i < SI_MAX_VIEWPORTS; i++)
+ si_scissor_make_union(&max_vp_scissor,
+ &ctx->viewports.as_scissor[i]);
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
+ scissor_enabled ? &states[i] : NULL);
+ }
+ }
+ si_emit_guardband(ctx, &max_vp_scissor);
+ ctx->scissors.dirty_mask = 0;
+}
+
+static void si_set_viewport_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct si_context *ctx = (struct si_context *)pctx;
+ unsigned mask;
+ int i;
+
+ for (i = 0; i < num_viewports; i++) {
+ unsigned index = start_slot + i;
+
+ ctx->viewports.states[index] = state[i];
+ si_get_scissor_from_viewport(ctx, &state[i],
+ &ctx->viewports.as_scissor[index]);
+ }
+
+ mask = ((1 << num_viewports) - 1) << start_slot;
+ ctx->viewports.dirty_mask |= mask;
+ ctx->viewports.depth_range_dirty_mask |= mask;
+ ctx->scissors.dirty_mask |= mask;
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+}
+
+static void si_emit_one_viewport(struct si_context *ctx,
+ struct pipe_viewport_state *state)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+
+ radeon_emit(cs, fui(state->scale[0]));
+ radeon_emit(cs, fui(state->translate[0]));
+ radeon_emit(cs, fui(state->scale[1]));
+ radeon_emit(cs, fui(state->translate[1]));
+ radeon_emit(cs, fui(state->scale[2]));
+ radeon_emit(cs, fui(state->translate[2]));
+}
+
+static void si_emit_viewports(struct si_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state *states = ctx->viewports.states;
+ unsigned mask = ctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ si_emit_one_viewport(ctx, &states[0]);
+ ctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++)
+ si_emit_one_viewport(ctx, &states[i]);
+ }
+ ctx->viewports.dirty_mask = 0;
+}
+
+static inline void
+si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+ bool window_space_position, float *zmin, float *zmax)
+{
+ if (window_space_position) {
+ *zmin = 0;
+ *zmax = 1;
+ return;
+ }
+ util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
+}
+
+static void si_emit_depth_ranges(struct si_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state *states = ctx->viewports.states;
+ unsigned mask = ctx->viewports.depth_range_dirty_mask;
+ bool clip_halfz = false;
+ bool window_space = ctx->vs_disables_clipping_viewport;
+ float zmin, zmax;
+
+ if (ctx->queued.named.rasterizer)
+ clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ si_viewport_zmin_zmax(&states[0], clip_halfz, window_space,
+ &zmin, &zmax);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ ctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ si_viewport_zmin_zmax(&states[i], clip_halfz, window_space,
+ &zmin, &zmax);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ }
+ }
+ ctx->viewports.depth_range_dirty_mask = 0;
+}
+
+static void si_emit_viewport_states(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ struct si_context *ctx = (struct si_context *)rctx;
+ si_emit_viewports(ctx);
+ si_emit_depth_ranges(ctx);
+}
+
+/**
+ * This reacts to 2 state changes:
+ * - VS.writes_viewport_index
+ * - VS output position in window space (enable/disable)
+ *
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+void si_update_vs_viewport_state(struct si_context *ctx)
+{
+ struct tgsi_shader_info *info = si_get_vs_info(ctx);
+ bool vs_window_space;
+
+ if (!info)
+ return;
+
+ /* When the VS disables clipping and viewport transformation. */
+ vs_window_space =
+ info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
+ if (ctx->vs_disables_clipping_viewport != vs_window_space) {
+ ctx->vs_disables_clipping_viewport = vs_window_space;
+ ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+ }
+
+ /* Viewport index handling. */
+ ctx->vs_writes_viewport_index = info->writes_viewport_index;
+ if (!ctx->vs_writes_viewport_index)
+ return;
+
+ if (ctx->scissors.dirty_mask)
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+
+ if (ctx->viewports.dirty_mask ||
+ ctx->viewports.depth_range_dirty_mask)
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+}
+
+void si_init_viewport_functions(struct si_context *ctx)
+{
+ ctx->scissors.atom.emit = si_emit_scissors;
+ ctx->viewports.atom.emit = si_emit_viewport_states;
+
+ ctx->b.b.set_scissor_states = si_set_scissor_states;
+ ctx->b.b.set_viewport_states = si_set_viewport_states;
+}
diff --git a/lib/mesa/src/gallium/drivers/softpipe/sp_query.c b/lib/mesa/src/gallium/drivers/softpipe/sp_query.c
index bec0116a5..267c99977 100644
--- a/lib/mesa/src/gallium/drivers/softpipe/sp_query.c
+++ b/lib/mesa/src/gallium/drivers/softpipe/sp_query.c
@@ -60,9 +60,11 @@ softpipe_create_query(struct pipe_context *pipe,
assert(type == PIPE_QUERY_OCCLUSION_COUNTER ||
type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
type == PIPE_QUERY_TIME_ELAPSED ||
type == PIPE_QUERY_SO_STATISTICS ||
type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
type == PIPE_QUERY_PRIMITIVES_GENERATED ||
type == PIPE_QUERY_PIPELINE_STATISTICS ||
@@ -92,6 +94,7 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
sq->start = softpipe->occlusion_count;
break;
case PIPE_QUERY_TIME_ELAPSED:
@@ -102,7 +105,9 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- sq->end = FALSE;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written;
+ sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written;
@@ -144,6 +149,7 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
sq->end = softpipe->occlusion_count;
break;
case PIPE_QUERY_TIMESTAMP:
@@ -153,6 +159,7 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
sq->end = os_time_get_nano();
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
sq->so.num_primitives_written =
softpipe->so_stats.num_primitives_written - sq->so.num_primitives_written;
sq->so.primitives_storage_needed =
@@ -230,6 +237,7 @@ softpipe_get_query_result(struct pipe_context *pipe,
vresult->b = TRUE;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
vresult->b = sq->end != 0;
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
@@ -247,6 +255,7 @@ softpipe_get_query_result(struct pipe_context *pipe,
*result = sq->so.primitives_storage_needed;
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
vresult->b = sq->end - sq->start != 0;
break;
default:
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
index b968fb030..19d5e5031 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -286,25 +286,25 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
gen_type,
gen_nr,
gen_size, gen_func, &gen_buf);
- if (ret != PIPE_OK)
- goto done;
-
- pipe_debug_message(&svga->debug.callback, PERF_INFO,
- "generating temporary index buffer for drawing %s",
- u_prim_name(prim));
-
- ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
- gen_buf,
- gen_size,
- start,
- 0,
- count - 1,
- gen_prim, 0, gen_nr,
- start_instance,
- instance_count);
-done:
- if (gen_buf)
+ if (ret == PIPE_OK) {
+ pipe_debug_message(&svga->debug.callback, PERF_INFO,
+ "generating temporary index buffer for drawing %s",
+ u_prim_name(prim));
+
+ ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
+ gen_buf,
+ gen_size,
+ start,
+ 0,
+ count - 1,
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
+ }
+
+ if (gen_buf) {
pipe_resource_reference(&gen_buf, NULL);
+ }
}
SVGA_STATS_TIME_POP(svga_sws(svga));
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
index f9bb13664..b1db87107 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_elements.c
@@ -242,21 +242,21 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
prim, gen_prim,
count, gen_nr, gen_size,
gen_func, &gen_buf);
- if (ret != PIPE_OK)
- goto done;
-
- ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
- gen_buf,
- gen_size,
- index_bias,
- min_index,
- max_index,
- gen_prim, 0, gen_nr,
- start_instance,
- instance_count);
-done:
- if (gen_buf)
+ if (ret == PIPE_OK) {
+ ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
+ gen_buf,
+ gen_size,
+ index_bias,
+ min_index,
+ max_index,
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
+ }
+
+ if (gen_buf) {
pipe_resource_reference(&gen_buf, NULL);
+ }
}
SVGA_STATS_TIME_POP(svga_sws(hwtnl->svga));
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
index 38e5e66fd..2a60038e9 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
+++ b/lib/mesa/src/gallium/drivers/svga/svga_draw_private.h
@@ -42,7 +42,7 @@ struct u_upload_mgr;
* handled by the svga device. Other types will be converted to
* these types by the index/translation code.
*/
-static const unsigned svga_hw_prims =
+static const unsigned svga_hw_prims =
((1 << PIPE_PRIM_POINTS) |
(1 << PIPE_PRIM_LINES) |
(1 << PIPE_PRIM_LINE_STRIP) |
@@ -73,23 +73,23 @@ svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
case PIPE_PRIM_LINES:
*prim_count = vcount / 2;
- return SVGA3D_PRIMITIVE_LINELIST;
+ return SVGA3D_PRIMITIVE_LINELIST;
case PIPE_PRIM_LINE_STRIP:
*prim_count = vcount - 1;
- return SVGA3D_PRIMITIVE_LINESTRIP;
+ return SVGA3D_PRIMITIVE_LINESTRIP;
case PIPE_PRIM_TRIANGLES:
*prim_count = vcount / 3;
- return SVGA3D_PRIMITIVE_TRIANGLELIST;
+ return SVGA3D_PRIMITIVE_TRIANGLELIST;
case PIPE_PRIM_TRIANGLE_STRIP:
*prim_count = vcount - 2;
- return SVGA3D_PRIMITIVE_TRIANGLESTRIP;
+ return SVGA3D_PRIMITIVE_TRIANGLESTRIP;
case PIPE_PRIM_TRIANGLE_FAN:
*prim_count = vcount - 2;
- return SVGA3D_PRIMITIVE_TRIANGLEFAN;
+ return SVGA3D_PRIMITIVE_TRIANGLEFAN;
case PIPE_PRIM_LINES_ADJACENCY:
*prim_count = vcount / 4;
@@ -119,8 +119,7 @@ struct index_cache {
u_generate_func generate;
unsigned gen_nr;
- /* If non-null, this buffer is filled by calling
- * generate(nr, map(buffer))
+ /* If non-null, this buffer is filled by calling generate(nr, map(buffer))
*/
struct pipe_resource *buffer;
};
@@ -160,7 +159,7 @@ struct svga_hwtnl {
* vertex buffers.
*/
int index_bias;
-
+
/* Provoking vertex information (for flat shading). */
unsigned api_pv; /**< app-requested PV mode (PV_FIRST or PV_LAST) */
unsigned hw_pv; /**< device-supported PV mode (PV_FIRST or PV_LAST) */
@@ -220,27 +219,26 @@ svga_need_unfilled_fallback(const struct svga_hwtnl *hwtnl,
}
-enum pipe_error
-svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
- const SVGA3dPrimitiveRange *range,
- unsigned vcount,
- unsigned min_index,
- unsigned max_index,
- struct pipe_resource *ib,
- unsigned start_instance, unsigned instance_count);
-
enum pipe_error
-svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
- struct pipe_resource *indexBuffer,
- unsigned index_size,
- int index_bias,
- unsigned min_index,
- unsigned max_index,
- enum pipe_prim_type prim,
- unsigned start,
- unsigned count,
- unsigned start_instance,
- unsigned instance_count);
+svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned vcount,
+ unsigned min_index,
+ unsigned max_index,
+ struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count);
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
+ struct pipe_resource *indexBuffer,
+ unsigned index_size,
+ int index_bias,
+ unsigned min_index,
+ unsigned max_index,
+ enum pipe_prim_type prim,
+ unsigned start,
+ unsigned count,
+ unsigned start_instance,
+ unsigned instance_count);
#endif
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
index 408e175fe..a29fbd3ac 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -331,6 +331,7 @@ svga_create_blend_state(struct pipe_context *pipe,
blend->independent_blend_enable = templ->independent_blend_enable;
blend->alpha_to_coverage = templ->alpha_to_coverage;
+ blend->alpha_to_one = templ->alpha_to_one;
if (svga_have_vgpu10(svga)) {
define_blend_state_object(svga, blend);
diff --git a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
index 7cbd51669..0b2d8af64 100644
--- a/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
+++ b/lib/mesa/src/gallium/drivers/svga/svga_tgsi.c
@@ -209,6 +209,12 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga,
goto fail;
}
+ if (emit.info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ debug_printf(
+ "svga: indirect indexing of temporary registers is not supported.\n");
+ goto fail;
+ }
+
emit.in_main_func = TRUE;
if (!svga_shader_emit_header(&emit)) {
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
new file mode 100644
index 000000000..d81f7d019
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file ${filename}.h
+*
+* @brief Dynamic Knobs for Core.
+*
+* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
+*
+* Generation Command Line:
+* ${'\n* '.join(cmdline)}
+*
+******************************************************************************/
+<% calc_max_knob_len(knobs) %>
+#pragma once
+#include <string>
+
+struct KnobBase
+{
+private:
+ // Update the input string.
+ static void autoExpandEnvironmentVariables(std::string &text);
+
+protected:
+ // Leave input alone and return new string.
+ static std::string expandEnvironmentVariables(std::string const &input)
+ {
+ std::string text = input;
+ autoExpandEnvironmentVariables(text);
+ return text;
+ }
+
+ template <typename T>
+ static T expandEnvironmentVariables(T const &input)
+ {
+ return input;
+ }
+};
+
+template <typename T>
+struct Knob : KnobBase
+{
+public:
+ const T& Value() const { return m_Value; }
+ const T& Value(T const &newValue)
+ {
+ m_Value = expandEnvironmentVariables(newValue);
+ return Value();
+ }
+
+private:
+ T m_Value;
+};
+
+#define DEFINE_KNOB(_name, _type, _default) \\
+
+ struct Knob_##_name : Knob<_type> \\
+
+ { \\
+
+ static const char* Name() { return "KNOB_" #_name; } \\
+
+ static _type DefaultValue() { return (_default); } \\
+
+ } _name;
+
+#define GET_KNOB(_name) g_GlobalKnobs._name.Value()
+#define SET_KNOB(_name, _newValue) g_GlobalKnobs._name.Value(_newValue)
+
+struct GlobalKnobs
+{
+ % for knob in knobs:
+ //-----------------------------------------------------------
+ // KNOB_${knob[0]}
+ //
+ % for line in knob[1]['desc']:
+ // ${line}
+ % endfor
+ % if knob[1].get('choices'):
+ <%
+ choices = knob[1].get('choices')
+ _max_len = calc_max_name_len(choices) %>//
+ % for i in range(len(choices)):
+ // ${choices[i]['name']}${space_name(choices[i]['name'], _max_len)} = ${format(choices[i]['value'], '#010x')}
+ % endfor
+ % endif
+ //
+ % if knob[1]['type'] == 'std::string':
+ DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, "${repr(knob[1]['default'])[1:-1]}");
+ % else:
+ DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, ${knob[1]['default']});
+ % endif
+
+ % endfor
+
+ std::string ToString(const char* optPerLinePrefix="");
+ GlobalKnobs();
+};
+extern GlobalKnobs g_GlobalKnobs;
+
+#undef DEFINE_KNOB
+
+% for knob in knobs:
+#define KNOB_${knob[0]}${space_knob(knob[0])} GET_KNOB(${knob[0]})
+% endfor
+
+<%!
+ # Globally available python
+ max_len = 0
+ def calc_max_knob_len(knobs):
+ global max_len
+ max_len = 0
+ for knob in knobs:
+ if len(knob[0]) > max_len: max_len = len(knob[0])
+ max_len += len('KNOB_ ')
+ if max_len % 4: max_len += 4 - (max_len % 4)
+
+ def space_knob(knob):
+ knob_len = len('KNOB_' + knob)
+ return ' '*(max_len - knob_len)
+
+ def calc_max_name_len(choices_array):
+ _max_len = 0
+ for choice in choices_array:
+ if len(choice['name']) > _max_len: _max_len = len(choice['name'])
+
+ if _max_len % 4: _max_len += 4 - (_max_len % 4)
+ return _max_len
+
+ def space_name(name, max_len):
+ name_len = len(name)
+ return ' '*(max_len - name_len)
+%>
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
new file mode 100644
index 000000000..a4ecd09f1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
@@ -0,0 +1,193 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD128 AVX (512) implementation
+//
+// Since this implementation inherits from the AVX (2) implementation,
+// the only operations below ones that replace AVX (2) operations.
+// These use native AVX512 instructions with masking to enable a larger
+// register set.
+//============================================================================
+
+#define SIMD_WRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+ }
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
+
+#define SIMD_WRAPPER_1I_(op, intrin, mask) \
+ template<int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+ }
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
+
+#define SIMD_WRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+ }
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
+
+#define SIMD_WRAPPER_2I(op) \
+ template<int ImmT>\
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+ }
+
+#define SIMD_WRAPPER_3_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+ }
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
+
+#define SIMD_DWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+ }
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
+
+#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
+ template<int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+ }
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
+
+#define SIMD_DWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+ }
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
+
+#define SIMD_DWRAPPER_2I(op) \
+ template<int ImmT>\
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\
+ }
+
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+ }
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template<int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+ }
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+ }
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2I(op) \
+ template<int ImmT>\
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+ }
+
+SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_16(unpackhi_epi16);
+SIMD_IWRAPPER_2_64(unpackhi_epi64);
+SIMD_IWRAPPER_2_8(unpackhi_epi8);
+SIMD_IWRAPPER_2_16(unpacklo_epi16);
+SIMD_IWRAPPER_2_64(unpacklo_epi64);
+SIMD_IWRAPPER_2_8(unpacklo_epi8);
+
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
+{
+ __mmask64 m = 0xffffull;
+ return static_cast<uint32_t>(
+ _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+}
+
+#undef SIMD_WRAPPER_1_
+#undef SIMD_WRAPPER_1
+#undef SIMD_WRAPPER_1I_
+#undef SIMD_WRAPPER_1I
+#undef SIMD_WRAPPER_2_
+#undef SIMD_WRAPPER_2
+#undef SIMD_WRAPPER_2I
+#undef SIMD_WRAPPER_3_
+#undef SIMD_WRAPPER_3
+#undef SIMD_DWRAPPER_1_
+#undef SIMD_DWRAPPER_1
+#undef SIMD_DWRAPPER_1I_
+#undef SIMD_DWRAPPER_1I
+#undef SIMD_DWRAPPER_2_
+#undef SIMD_DWRAPPER_2
+#undef SIMD_DWRAPPER_2I
+#undef SIMD_IWRAPPER_1_
+#undef SIMD_IWRAPPER_1_8
+#undef SIMD_IWRAPPER_1_16
+#undef SIMD_IWRAPPER_1_32
+#undef SIMD_IWRAPPER_1_64
+#undef SIMD_IWRAPPER_1I_
+#undef SIMD_IWRAPPER_1I_8
+#undef SIMD_IWRAPPER_1I_16
+#undef SIMD_IWRAPPER_1I_32
+#undef SIMD_IWRAPPER_1I_64
+#undef SIMD_IWRAPPER_2_
+#undef SIMD_IWRAPPER_2_8
+#undef SIMD_IWRAPPER_2_16
+#undef SIMD_IWRAPPER_2_32
+#undef SIMD_IWRAPPER_2_64
+#undef SIMD_IWRAPPER_2I
+//#undef SIMD_IWRAPPER_2I_8
+//#undef SIMD_IWRAPPER_2I_16
+//#undef SIMD_IWRAPPER_2I_32
+//#undef SIMD_IWRAPPER_2I_64
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
new file mode 100644
index 000000000..b0cae5034
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
@@ -0,0 +1,35 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD128 AVX (512) implementation for Knights Family
+//
+// Since this implementation inherits from the AVX512Base implementation,
+// the only operations below ones that replace AVX512F / AVX512CD operations
+// These use native AVX512 instructions with masking to enable a larger
+// register set.
+//============================================================================
+
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
new file mode 100644
index 000000000..6ffe7c2a0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
@@ -0,0 +1,127 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD256 AVX (512) implementation for Core processors
+//
+// Since this implementation inherits from the AVX (2) implementation,
+// the only operations below ones that replace AVX (2) operations.
+// These use native AVX512 instructions with masking to enable a larger
+// register set.
+//============================================================================
+
+#define SIMD_DWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+ }
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
+
+#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
+ template<int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+ }
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
+
+#define SIMD_DWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+ }
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
+
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+ }
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
+
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template<int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+ }
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
+
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+ }
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
+
+
+SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_16(unpackhi_epi16);
+SIMD_IWRAPPER_2_64(unpackhi_epi64);
+SIMD_IWRAPPER_2_8(unpackhi_epi8);
+SIMD_IWRAPPER_2_16(unpacklo_epi16);
+SIMD_IWRAPPER_2_64(unpacklo_epi64);
+SIMD_IWRAPPER_2_8(unpacklo_epi8);
+
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
+{
+ __mmask64 m = 0xffffffffull;
+ return static_cast<uint32_t>(
+ _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+}
+
+#undef SIMD_DWRAPPER_1_
+#undef SIMD_DWRAPPER_1
+#undef SIMD_DWRAPPER_1I_
+#undef SIMD_DWRAPPER_1I
+#undef SIMD_DWRAPPER_2_
+#undef SIMD_DWRAPPER_2
+#undef SIMD_DWRAPPER_2I
+#undef SIMD_IWRAPPER_1_
+#undef SIMD_IWRAPPER_1_8
+#undef SIMD_IWRAPPER_1_16
+#undef SIMD_IWRAPPER_1_64
+#undef SIMD_IWRAPPER_1I_
+#undef SIMD_IWRAPPER_1I_8
+#undef SIMD_IWRAPPER_1I_16
+#undef SIMD_IWRAPPER_1I_64
+#undef SIMD_IWRAPPER_2_
+#undef SIMD_IWRAPPER_2_8
+#undef SIMD_IWRAPPER_2_16
+#undef SIMD_IWRAPPER_2_64
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
new file mode 100644
index 000000000..acd8ffd96
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
@@ -0,0 +1,35 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD256 AVX (512) implementation for Knights Family
+//
+// Since this implementation inherits from the AVX (2) implementation,
+// the only operations below ones that replace AVX (2) operations.
+// These use native AVX512 instructions with masking to enable a larger
+// register set.
+//============================================================================
+
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
new file mode 100644
index 000000000..fed6307f4
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
@@ -0,0 +1,217 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD16 AVX512 (F) implementation for Core processors
+//
+//============================================================================
+
+#define SIMD_WRAPPER_1_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ {\
+ return intrin(a);\
+ }
+
+#define SIMD_WRAPPER_1(op) \
+ SIMD_WRAPPER_1_(op, _mm512_##op)
+
+#define SIMD_WRAPPER_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_##intrin(a, b);\
+ }
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
+
+#define SIMD_WRAPPERI_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_castsi512_ps(_mm512_##intrin(\
+ _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+ }
+
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return _mm512_##op(a, b);\
+ }
+
+#define SIMD_WRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
+
+#define SIMD_DWRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
+
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ {\
+ return _mm512_##op(a, b, c);\
+ }
+
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+#define SIMD_IWRAPPER_1_8(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+
+#define SIMD_IWRAPPER_1_4(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template<int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return intrin(a, ImmT);\
+ }
+#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
+
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return _mm512_##intrin(a, b);\
+ }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
+
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return cmp(a, b);\
+ }
+
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+ }
+
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
+
+private:
+ static SIMDINLINE Integer vmask(__mmask32 m)
+ {
+ return _mm512_maskz_set1_epi16(m, -1);
+ }
+ static SIMDINLINE Integer vmask(__mmask64 m)
+ {
+ return _mm512_maskz_set1_epi8(m, -1);
+ }
+public:
+
+SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+
+SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
+SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
+SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
+SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
+
+SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
+
+template<CompareTypeInt CmpTypeT>
+static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
+{
+ // Legacy vector mask generator
+ __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
+ return vmask(result);
+}
+template<CompareTypeInt CmpTypeT>
+static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
+{
+ // Legacy vector mask generator
+ __mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast<const int>(CmpTypeT));
+ return vmask(result);
+}
+
+SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
+
+SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32
+
+SIMD_IWRAPPER_2(unpackhi_epi8); // See documentation for _mm512_unpackhi_epi8
+SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16
+SIMD_IWRAPPER_2(unpacklo_epi8); // See documentation for _mm512_unpacklo_epi8
+
+SIMD_IWRAPPER_2(shuffle_epi8);
+
+static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
+{
+ __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
+ return static_cast<uint64_t>(m);
+}
+
+
+
+#undef SIMD_WRAPPER_1_
+#undef SIMD_WRAPPER_1
+#undef SIMD_WRAPPER_2
+#undef SIMD_WRAPPER_2_
+#undef SIMD_WRAPPERI_2_
+#undef SIMD_DWRAPPER_2
+#undef SIMD_DWRAPPER_2I
+#undef SIMD_WRAPPER_2I_
+#undef SIMD_WRAPPER_3_
+#undef SIMD_WRAPPER_2I
+#undef SIMD_WRAPPER_3
+#undef SIMD_IWRAPPER_1
+#undef SIMD_IWRAPPER_2
+#undef SIMD_IFWRAPPER_2
+#undef SIMD_IWRAPPER_2I
+#undef SIMD_IWRAPPER_1
+#undef SIMD_IWRAPPER_1I
+#undef SIMD_IWRAPPER_1I_
+#undef SIMD_IWRAPPER_2
+#undef SIMD_IWRAPPER_2_
+#undef SIMD_IWRAPPER_2I
+
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
new file mode 100644
index 000000000..690ab386b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
@@ -0,0 +1,161 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+//============================================================================
+// SIMD16 AVX512 (F) implementation for Knights Family Processors
+//
+//============================================================================
+
+#define SIMD_WRAPPER_1_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ {\
+ return intrin(a);\
+ }
+
+#define SIMD_WRAPPER_1(op) \
+ SIMD_WRAPPER_1_(op, _mm512_##op)
+
+#define SIMD_WRAPPER_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_##intrin(a, b);\
+ }
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
+
+#define SIMD_WRAPPERI_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_castsi512_ps(_mm512_##intrin(\
+ _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+ }
+
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return _mm512_##op(a, b);\
+ }
+
+#define SIMD_WRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
+
+#define SIMD_DWRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
+
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ {\
+ return _mm512_##op(a, b, c);\
+ }
+
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+#define SIMD_IWRAPPER_1_8(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+
+#define SIMD_IWRAPPER_1_4(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \
+ {\
+ return _mm512_##op(a);\
+ }
+
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template<int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ {\
+ return intrin(a, ImmT);\
+ }
+#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
+
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return _mm512_##intrin(a, b);\
+ }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
+
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return cmp(a, b);\
+ }
+
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+ }
+
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template<int ImmT>\
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ {\
+ return _mm512_##intrin(a, b, ImmT);\
+ }
+#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
+
+SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int)
+SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int)
+SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int)
+SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int)
+
+#undef SIMD_WRAPPER_1_
+#undef SIMD_WRAPPER_1
+#undef SIMD_WRAPPER_2
+#undef SIMD_WRAPPER_2_
+#undef SIMD_WRAPPERI_2_
+#undef SIMD_DWRAPPER_2
+#undef SIMD_DWRAPPER_2I
+#undef SIMD_WRAPPER_2I_
+#undef SIMD_WRAPPER_3_
+#undef SIMD_WRAPPER_2I
+#undef SIMD_WRAPPER_3
+#undef SIMD_IWRAPPER_1
+#undef SIMD_IWRAPPER_2
+#undef SIMD_IFWRAPPER_2
+#undef SIMD_IWRAPPER_2I
+#undef SIMD_IWRAPPER_1
+#undef SIMD_IWRAPPER_1I
+#undef SIMD_IWRAPPER_1I_
+#undef SIMD_IWRAPPER_2
+#undef SIMD_IWRAPPER_2_
+#undef SIMD_IWRAPPER_2I
+
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
new file mode 100644
index 000000000..3e36ce5bd
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
@@ -0,0 +1,27 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+// Implement mask-enabled SIMD functions
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
new file mode 100644
index 000000000..3e36ce5bd
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
@@ -0,0 +1,27 @@
+/****************************************************************************
+* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+****************************************************************************/
+#if !defined(__SIMD_LIB_AVX512_HPP__)
+#error Do not include this file directly, use "simdlib.hpp" instead.
+#endif
+
+// Implement mask-enabled SIMD functions
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
index 1d8546959..00c3a87c1 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
@@ -109,6 +109,7 @@ template <>
struct ConservativeRastFETraits<StandardRastT>
{
typedef std::false_type IsConservativeT;
+ typedef std::integral_constant<uint32_t, 0> BoundingBoxOffsetT;
};
//////////////////////////////////////////////////////////////////////////
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
index 590c56903..fafc36d1d 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
@@ -30,7 +30,7 @@
#include "format_conversion.h"
INLINE
-void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps)
+void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
{
simdscalari stencil = _simd_castps_si(stencilps);
@@ -81,7 +81,7 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds
template<SWR_FORMAT depthFormatT>
-simdscalar QuantizeDepth(simdscalar depth)
+simdscalar QuantizeDepth(simdscalar const &depth)
{
SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
@@ -117,7 +117,7 @@ simdscalar QuantizeDepth(simdscalar depth)
INLINE
simdscalar DepthStencilTest(const API_STATE* pState,
- bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask,
+ bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
uint8_t *pStencilBase, simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
@@ -132,7 +132,7 @@ simdscalar DepthStencilTest(const API_STATE* pState,
// clamp Z to viewport [minZ..maxZ]
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
- interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
+ simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
if (pDSState->depthTestEnable)
{
@@ -215,7 +215,7 @@ simdscalar DepthStencilTest(const API_STATE* pState,
INLINE
void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
- bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
+ bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
uint8_t *pStencilBase, const simdscalar& stencilMask)
{
if (pDSState->depthWriteEnable)
@@ -223,7 +223,7 @@ void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_ST
// clamp Z to viewport [minZ..maxZ]
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
- interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
+ simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
_simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
diff --git a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
index ba2df2292..12c2a3031 100644
--- a/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
+++ b/lib/mesa/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
@@ -91,16 +91,18 @@ static inline void ConvertEnvToKnob(const char* pOverride, std::string& knobValu
template <typename T>
static inline void InitKnob(T& knob)
{
-
- // TODO, read registry first
-
- // Second, read environment variables
+ // Read environment variables
const char* pOverride = getenv(knob.Name());
if (pOverride)
{
- auto knobValue = knob.Value();
+ auto knobValue = knob.DefaultValue();
ConvertEnvToKnob(pOverride, knobValue);
knob.Value(knobValue);
}
+ else
+ {
+ // Set default value
+ knob.Value(knob.DefaultValue());
+ }
}
diff --git a/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h b/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h
index b38d63eac..7f969a303 100644
--- a/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h
+++ b/lib/mesa/src/gallium/drivers/trace/tr_dump_defines.h
@@ -50,7 +50,7 @@ trace_dump_query_type(unsigned value)
if (!trace_dumping_enabled_locked())
return;
- trace_dump_enum(util_dump_query_type(value, FALSE));
+ trace_dump_enum(util_str_query_type(value, FALSE));
}
diff --git a/lib/mesa/src/gallium/drivers/vc4/Automake.inc b/lib/mesa/src/gallium/drivers/vc4/Automake.inc
index 5664c2ab1..b1aa9726b 100644
--- a/lib/mesa/src/gallium/drivers/vc4/Automake.inc
+++ b/lib/mesa/src/gallium/drivers/vc4/Automake.inc
@@ -4,6 +4,7 @@ TARGET_DRIVERS += vc4
TARGET_CPPFLAGS += -DGALLIUM_VC4
TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
- $(top_builddir)/src/gallium/drivers/vc4/libvc4.la
+ $(top_builddir)/src/gallium/drivers/vc4/libvc4.la \
+ $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
endif
diff --git a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index b926d35a6..2da797899 100644
--- a/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/lib/mesa/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -255,8 +255,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
uint8_t max_y_tile = args->max_y_tile;
uint8_t xtiles = max_x_tile - min_x_tile + 1;
uint8_t ytiles = max_y_tile - min_y_tile + 1;
- uint8_t x, y;
+ uint8_t xi, yi;
uint32_t size, loop_body_size;
+ bool positive_x = true;
+ bool positive_y = true;
+
+ if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
+ if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X))
+ positive_x = false;
+ if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y))
+ positive_y = false;
+ }
size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -348,10 +357,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
rcl_u32(setup, 0); /* no address, since we're in None mode */
}
- for (y = min_y_tile; y <= max_y_tile; y++) {
- for (x = min_x_tile; x <= max_x_tile; x++) {
- bool first = (x == min_x_tile && y == min_y_tile);
- bool last = (x == max_x_tile && y == max_y_tile);
+ for (yi = 0; yi < ytiles; yi++) {
+ int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
+ for (xi = 0; xi < xtiles; xi++) {
+ int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
+ bool first = (xi == 0 && yi == 0);
+ bool last = (xi == xtiles - 1 && yi == ytiles - 1);
emit_tile(exec, setup, x, y, first, last);
}
diff --git a/lib/mesa/src/gallium/drivers/vc5/Automake.inc b/lib/mesa/src/gallium/drivers/vc5/Automake.inc
new file mode 100644
index 000000000..57c8a28ef
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/Automake.inc
@@ -0,0 +1,14 @@
+if HAVE_GALLIUM_VC5
+
+TARGET_DRIVERS += vc5
+TARGET_CPPFLAGS += -DGALLIUM_VC5
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/winsys/vc5/drm/libvc5drm.la \
+ $(top_builddir)/src/gallium/drivers/vc5/libvc5.la \
+ $(top_builddir)/src/broadcom/libbroadcom.la
+
+if !HAVE_GALLIUM_VC4
+TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la
+endif
+
+endif
diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.am b/lib/mesa/src/gallium/drivers/vc5/Makefile.am
new file mode 100644
index 000000000..42d4be73d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.am
@@ -0,0 +1,40 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_builddir)/src/broadcom \
+ $(LIBDRM_CFLAGS) \
+ $(VC5_SIMULATOR_CFLAGS) \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $()
+
+noinst_LTLIBRARIES = libvc5.la
+
+libvc5_la_SOURCES = $(C_SOURCES)
+
+libvc5_la_LDFLAGS = \
+ $(VC5_SIMULATOR_LIBS) \
+ $(NULL)
diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.in b/lib/mesa/src/gallium/drivers/vc5/Makefile.in
new file mode 100644
index 000000000..a19c97f88
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.in
@@ -0,0 +1,939 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_LIBDRM_TRUE@am__append_1 = \
+@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+
+@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
+
+subdir = src/gallium/drivers/vc5
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+ $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+ $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+ $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_flex.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libvc5_la_LIBADD =
+am__objects_1 = vc5_blit.lo vc5_bufmgr.lo vc5_cl.lo vc5_context.lo \
+ vc5_draw.lo vc5_emit.lo vc5_fence.lo vc5_formats.lo vc5_job.lo \
+ vc5_program.lo vc5_query.lo vc5_rcl.lo vc5_resource.lo \
+ vc5_screen.lo vc5_simulator.lo vc5_state.lo vc5_tiling.lo \
+ vc5_uniforms.lo
+am_libvc5_la_OBJECTS = $(am__objects_1)
+libvc5_la_OBJECTS = $(am_libvc5_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libvc5_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libvc5_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libvc5_la_SOURCES)
+DIST_SOURCES = $(libvc5_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+ $(top_srcdir)/bin/depcomp \
+ $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
+AMDGPU_LIBS = @AMDGPU_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+ANDROID_CFLAGS = @ANDROID_CFLAGS@
+ANDROID_LIBS = @ANDROID_LIBS@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
+ETNAVIV_LIBS = @ETNAVIV_LIBS@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLVND_CFLAGS = @GLVND_CFLAGS@
+GLVND_LIBS = @GLVND_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+I915_CFLAGS = @I915_CFLAGS@
+I915_LIBS = @I915_LIBS@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBATOMIC_LIBS = @LIBATOMIC_LIBS@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBELF_CFLAGS = @LIBELF_CFLAGS@
+LIBELF_LIBS = @LIBELF_LIBS@
+LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBS = @LLVM_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
+NVVIEUX_LIBS = @NVVIEUX_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENCL_VERSION = @OPENCL_VERSION@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
+PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PWR8_CFLAGS = @PWR8_CFLAGS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+RM = @RM@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
+SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
+SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
+SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
+SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+VALGRIND_LIBS = @VALGRIND_LIBS@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
+WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
+XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+C_SOURCES := \
+ vc5_blit.c \
+ vc5_bufmgr.c \
+ vc5_bufmgr.h \
+ vc5_cl.c \
+ vc5_cl.h \
+ vc5_context.c \
+ vc5_context.h \
+ vc5_draw.c \
+ vc5_drm.h \
+ vc5_emit.c \
+ vc5_fence.c \
+ vc5_formats.c \
+ vc5_job.c \
+ vc5_program.c \
+ vc5_query.c \
+ vc5_rcl.c \
+ vc5_resource.c \
+ vc5_resource.h \
+ vc5_screen.c \
+ vc5_screen.h \
+ vc5_simulator.c \
+ vc5_state.c \
+ vc5_tiling.c \
+ vc5_tiling.h \
+ vc5_uniforms.c \
+ $()
+
+GALLIUM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES)
+
+
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_DRIVER_CXXFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CXXFLAGS)
+
+GALLIUM_TARGET_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
+ $(DEFINES) \
+ $(PTHREAD_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
+ $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_WINSYS_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_2) $(am__append_3)
+AM_CFLAGS = \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_builddir)/src/broadcom \
+ $(LIBDRM_CFLAGS) \
+ $(VC5_SIMULATOR_CFLAGS) \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(VALGRIND_CFLAGS) \
+ $()
+
+noinst_LTLIBRARIES = libvc5.la
+libvc5_la_SOURCES = $(C_SOURCES)
+libvc5_la_LDFLAGS = \
+ $(VC5_SIMULATOR_LIBS) \
+ $(NULL)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/vc5/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/gallium/drivers/vc5/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libvc5.la: $(libvc5_la_OBJECTS) $(libvc5_la_DEPENDENCIES) $(EXTRA_libvc5_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libvc5_la_LINK) $(libvc5_la_OBJECTS) $(libvc5_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_bufmgr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_cl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_fence.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_formats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_job.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_rcl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_resource.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_simulator.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_tiling.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_uniforms.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/mesa/src/gallium/drivers/vc5/Makefile.sources b/lib/mesa/src/gallium/drivers/vc5/Makefile.sources
new file mode 100644
index 000000000..3fb6a0d09
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/Makefile.sources
@@ -0,0 +1,27 @@
+C_SOURCES := \
+ vc5_blit.c \
+ vc5_bufmgr.c \
+ vc5_bufmgr.h \
+ vc5_cl.c \
+ vc5_cl.h \
+ vc5_context.c \
+ vc5_context.h \
+ vc5_draw.c \
+ vc5_drm.h \
+ vc5_emit.c \
+ vc5_fence.c \
+ vc5_formats.c \
+ vc5_job.c \
+ vc5_program.c \
+ vc5_query.c \
+ vc5_rcl.c \
+ vc5_resource.c \
+ vc5_resource.h \
+ vc5_screen.c \
+ vc5_screen.h \
+ vc5_simulator.c \
+ vc5_state.c \
+ vc5_tiling.c \
+ vc5_tiling.h \
+ vc5_uniforms.c \
+ $()
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c b/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c
new file mode 100644
index 000000000..64811416e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_blit.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2015-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_blitter.h"
+#include "vc5_context.h"
+
+#if 0
+static struct pipe_surface *
+vc5_get_blit_surface(struct pipe_context *pctx,
+ struct pipe_resource *prsc, unsigned level)
+{
+ struct pipe_surface tmpl;
+
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.format = prsc->format;
+ tmpl.u.tex.level = level;
+ tmpl.u.tex.first_layer = 0;
+ tmpl.u.tex.last_layer = 0;
+
+ return pctx->create_surface(pctx, prsc, &tmpl);
+}
+
+static bool
+is_tile_unaligned(unsigned size, unsigned tile_size)
+{
+ return size & (tile_size - 1);
+}
+
+static bool
+vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ bool msaa = (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1);
+ int tile_width = msaa ? 32 : 64;
+ int tile_height = msaa ? 32 : 64;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format))
+ return false;
+
+ if (info->scissor_enable)
+ return false;
+
+ if ((info->mask & PIPE_MASK_RGBA) == 0)
+ return false;
+
+ if (info->dst.box.x != info->src.box.x ||
+ info->dst.box.y != info->src.box.y ||
+ info->dst.box.width != info->src.box.width ||
+ info->dst.box.height != info->src.box.height) {
+ return false;
+ }
+
+ int dst_surface_width = u_minify(info->dst.resource->width0,
+ info->dst.level);
+ int dst_surface_height = u_minify(info->dst.resource->height0,
+ info->dst.level);
+ if (is_tile_unaligned(info->dst.box.x, tile_width) ||
+ is_tile_unaligned(info->dst.box.y, tile_height) ||
+ (is_tile_unaligned(info->dst.box.width, tile_width) &&
+ info->dst.box.x + info->dst.box.width != dst_surface_width) ||
+ (is_tile_unaligned(info->dst.box.height, tile_height) &&
+ info->dst.box.y + info->dst.box.height != dst_surface_height)) {
+ return false;
+ }
+
+ /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
+ * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
+ * destination surface) to determine the stride. This may be wrong
+ * when reading from texture miplevels > 0, which are stored in
+ * POT-sized areas. For MSAA, the tile addresses are computed
+ * explicitly by the RCL, but still use the destination width to
+ * determine the stride (which could be fixed by explicitly supplying
+ * it in the ABI).
+ */
+ struct vc5_resource *rsc = vc5_resource(info->src.resource);
+
+ uint32_t stride;
+
+ if (info->src.resource->nr_samples > 1)
+ stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
+ /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T)
+ stride = align(dst_surface_width * rsc->cpp, 128); */
+ else
+ stride = align(dst_surface_width * rsc->cpp, 16);
+
+ if (stride != rsc->slices[info->src.level].stride)
+ return false;
+
+ if (info->dst.resource->format != info->src.resource->format)
+ return false;
+
+ if (false) {
+ fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
+ info->src.box.x,
+ info->src.box.y,
+ info->dst.box.x,
+ info->dst.box.y,
+ info->dst.box.width,
+ info->dst.box.height);
+ }
+
+ struct pipe_surface *dst_surf =
+ vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level);
+ struct pipe_surface *src_surf =
+ vc5_get_blit_surface(pctx, info->src.resource, info->src.level);
+
+ vc5_flush_jobs_reading_resource(vc5, info->src.resource);
+
+ struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL);
+ pipe_surface_reference(&job->color_read, src_surf);
+
+ /* If we're resolving from MSAA to single sample, we still need to run
+ * the engine in MSAA mode for the load.
+ */
+ if (!job->msaa && info->src.resource->nr_samples > 1) {
+ job->msaa = true;
+ job->tile_width = 32;
+ job->tile_height = 32;
+ }
+
+ job->draw_min_x = info->dst.box.x;
+ job->draw_min_y = info->dst.box.y;
+ job->draw_max_x = info->dst.box.x + info->dst.box.width;
+ job->draw_max_y = info->dst.box.y + info->dst.box.height;
+ job->draw_width = dst_surf->width;
+ job->draw_height = dst_surf->height;
+
+ job->tile_width = tile_width;
+ job->tile_height = tile_height;
+ job->msaa = msaa;
+ job->needs_flush = true;
+ job->resolve |= PIPE_CLEAR_COLOR;
+
+ vc5_job_submit(vc5, job);
+
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_surface_reference(&src_surf, NULL);
+
+ return true;
+}
+#endif
+
+void
+vc5_blitter_save(struct vc5_context *vc5)
+{
+ util_blitter_save_fragment_constant_buffer_slot(vc5->blitter,
+ vc5->constbuf[PIPE_SHADER_FRAGMENT].cb);
+ util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb);
+ util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx);
+ util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+ util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer);
+ util_blitter_save_viewport(vc5->blitter, &vc5->viewport);
+ util_blitter_save_scissor(vc5->blitter, &vc5->scissor);
+ util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs);
+ util_blitter_save_blend(vc5->blitter, vc5->blend);
+ util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa);
+ util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref);
+ util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask);
+ util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer);
+ util_blitter_save_fragment_sampler_states(vc5->blitter,
+ vc5->fragtex.num_samplers,
+ (void **)vc5->fragtex.samplers);
+ util_blitter_save_fragment_sampler_views(vc5->blitter,
+ vc5->fragtex.num_textures, vc5->fragtex.textures);
+ util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets,
+ vc5->streamout.targets);
+}
+
+static bool
+vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(ctx);
+
+ if (!util_blitter_is_blit_supported(vc5->blitter, info)) {
+ fprintf(stderr, "blit unsupported %s -> %s\n",
+ util_format_short_name(info->src.resource->format),
+ util_format_short_name(info->dst.resource->format));
+ return false;
+ }
+
+ vc5_blitter_save(vc5);
+ util_blitter_blit(vc5->blitter, info);
+
+ return true;
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+void
+vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
+{
+ struct pipe_blit_info info = *blit_info;
+
+#if 0
+ if (vc5_tile_blit(pctx, blit_info))
+ return;
+#endif
+
+ vc5_render_blit(pctx, &info);
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c
new file mode 100644
index 000000000..c6c06dcfd
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <err.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+
+#include "vc5_context.h"
+#include "vc5_screen.h"
+
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#include <memcheck.h>
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+static bool dump_stats = false;
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache);
+
+static void
+vc5_bo_dump_stats(struct vc5_screen *screen)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
+ fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024);
+ fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
+ fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024);
+
+ if (!list_empty(&cache->time_list)) {
+ struct vc5_bo *first = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.next,
+ time_list);
+ struct vc5_bo *last = LIST_ENTRY(struct vc5_bo,
+ cache->time_list.prev,
+ time_list);
+
+ fprintf(stderr, " oldest cache time: %ld\n",
+ (long)first->free_time);
+ fprintf(stderr, " newest cache time: %ld\n",
+ (long)last->free_time);
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ fprintf(stderr, " now: %ld\n",
+ time.tv_sec);
+ }
+}
+
+static void
+vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo)
+{
+ list_del(&bo->time_list);
+ list_del(&bo->size_list);
+ cache->bo_count--;
+ cache->bo_size -= bo->size;
+}
+
+static struct vc5_bo *
+vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = size / 4096 - 1;
+
+ if (cache->size_list_size <= page_index)
+ return NULL;
+
+ struct vc5_bo *bo = NULL;
+ mtx_lock(&cache->lock);
+ if (!list_empty(&cache->size_list[page_index])) {
+ bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next,
+ size_list);
+
+ /* Check that the BO has gone idle. If not, then we want to
+ * allocate something new instead, since we assume that the
+ * user will proceed to CPU map it and fill it with stuff.
+ */
+ if (!vc5_bo_wait(bo, 0, NULL)) {
+ mtx_unlock(&cache->lock);
+ return NULL;
+ }
+
+ pipe_reference_init(&bo->reference, 1);
+ vc5_bo_remove_from_cache(cache, bo);
+
+ bo->name = name;
+ }
+ mtx_unlock(&cache->lock);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name)
+{
+ struct vc5_bo *bo;
+ int ret;
+
+ size = align(size, 4096);
+
+ bo = vc5_bo_from_cache(screen, size, name);
+ if (bo) {
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb from cache:\n",
+ name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ return bo;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = size;
+ bo->name = name;
+ bo->private = true;
+
+ retry:
+ ;
+
+ bool cleared_and_retried = false;
+ struct drm_vc5_create_bo create = {
+ .size = size
+ };
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_CREATE_BO, &create);
+ bo->handle = create.handle;
+ bo->offset = create.offset;
+
+ if (ret != 0) {
+ if (!list_empty(&screen->bo_cache.time_list) &&
+ !cleared_and_retried) {
+ cleared_and_retried = true;
+ vc5_bo_cache_free_all(&screen->bo_cache);
+ goto retry;
+ }
+
+ free(bo);
+ return NULL;
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ return bo;
+}
+
+void
+vc5_bo_last_unreference(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ struct timespec time;
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ mtx_lock(&screen->bo_cache.lock);
+ vc5_bo_last_unreference_locked_timed(bo, time.tv_sec);
+ mtx_unlock(&screen->bo_cache.lock);
+}
+
+static void
+vc5_bo_free(struct vc5_bo *bo)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (bo->map) {
+ if (using_vc5_simulator && bo->name &&
+ strcmp(bo->name, "winsys") == 0) {
+ free(bo->map);
+ } else {
+ munmap(bo->map, bo->size);
+ VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+ }
+ }
+
+ struct drm_gem_close c;
+ memset(&c, 0, sizeof(c));
+ c.handle = bo->handle;
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
+ if (ret != 0)
+ fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+
+ screen->bo_count--;
+ screen->bo_size -= bo->size;
+
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s%s%dkb:\n",
+ bo->name ? bo->name : "",
+ bo->name ? " " : "",
+ bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+
+ free(bo);
+}
+
+static void
+free_stale_bos(struct vc5_screen *screen, time_t time)
+{
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ bool freed_any = false;
+
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ if (dump_stats && !freed_any) {
+ fprintf(stderr, "Freeing stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ freed_any = true;
+ }
+
+ /* If it's more than a second old, free it. */
+ if (time - bo->free_time > 2) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ } else {
+ break;
+ }
+ }
+
+ if (dump_stats && freed_any) {
+ fprintf(stderr, "Freed stale BOs:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
+
+static void
+vc5_bo_cache_free_all(struct vc5_bo_cache *cache)
+{
+ mtx_lock(&cache->lock);
+ list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list,
+ time_list) {
+ vc5_bo_remove_from_cache(cache, bo);
+ vc5_bo_free(bo);
+ }
+ mtx_unlock(&cache->lock);
+}
+
+void
+vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time)
+{
+ struct vc5_screen *screen = bo->screen;
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+ uint32_t page_index = bo->size / 4096 - 1;
+
+ if (!bo->private) {
+ vc5_bo_free(bo);
+ return;
+ }
+
+ if (cache->size_list_size <= page_index) {
+ struct list_head *new_list =
+ ralloc_array(screen, struct list_head, page_index + 1);
+
+ /* Move old list contents over (since the array has moved, and
+ * therefore the pointers to the list heads have to change).
+ */
+ for (int i = 0; i < cache->size_list_size; i++) {
+ struct list_head *old_head = &cache->size_list[i];
+ if (list_empty(old_head))
+ list_inithead(&new_list[i]);
+ else {
+ new_list[i].next = old_head->next;
+ new_list[i].prev = old_head->prev;
+ new_list[i].next->prev = &new_list[i];
+ new_list[i].prev->next = &new_list[i];
+ }
+ }
+ for (int i = cache->size_list_size; i < page_index + 1; i++)
+ list_inithead(&new_list[i]);
+
+ cache->size_list = new_list;
+ cache->size_list_size = page_index + 1;
+ }
+
+ bo->free_time = time;
+ list_addtail(&bo->size_list, &cache->size_list[page_index]);
+ list_addtail(&bo->time_list, &cache->time_list);
+ cache->bo_count++;
+ cache->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Freed %s %dkb to cache:\n",
+ bo->name, bo->size / 1024);
+ vc5_bo_dump_stats(screen);
+ }
+ bo->name = NULL;
+
+ free_stale_bos(screen, time);
+}
+
+static struct vc5_bo *
+vc5_bo_open_handle(struct vc5_screen *screen,
+ uint32_t winsys_stride,
+ uint32_t handle, uint32_t size)
+{
+ struct vc5_bo *bo;
+
+ assert(size);
+
+ mtx_lock(&screen->bo_handles_mutex);
+
+ bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+ if (bo) {
+ pipe_reference(NULL, &bo->reference);
+ goto done;
+ }
+
+ bo = CALLOC_STRUCT(vc5_bo);
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->handle = handle;
+ bo->size = size;
+ bo->name = "winsys";
+ bo->private = false;
+
+#ifdef USE_VC5_SIMULATOR
+ vc5_simulator_open_from_handle(screen->fd, winsys_stride,
+ bo->handle, bo->size);
+ bo->map = malloc(bo->size);
+#endif
+
+ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+ mtx_unlock(&screen->bo_handles_mutex);
+ return bo;
+}
+
+struct vc5_bo *
+vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride)
+{
+ struct drm_gem_open o = {
+ .name = name
+ };
+ int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o);
+ if (ret) {
+ fprintf(stderr, "Failed to open bo %d: %s\n",
+ name, strerror(errno));
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+}
+
+struct vc5_bo *
+vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride)
+{
+ uint32_t handle;
+ int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
+ int size;
+ if (ret) {
+ fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd);
+ return NULL;
+ }
+
+ /* Determine the size of the bo we were handed. */
+ size = lseek(fd, 0, SEEK_END);
+ if (size == -1) {
+ fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd);
+ return NULL;
+ }
+
+ return vc5_bo_open_handle(screen, winsys_stride, handle, size);
+}
+
+int
+vc5_bo_get_dmabuf(struct vc5_bo *bo)
+{
+ int fd;
+ int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle,
+ O_CLOEXEC, &fd);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to export gem bo %d to dmabuf\n",
+ bo->handle);
+ return -1;
+ }
+
+ mtx_lock(&bo->screen->bo_handles_mutex);
+ bo->private = false;
+ util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+ mtx_unlock(&bo->screen->bo_handles_mutex);
+
+ return fd;
+}
+
+bool
+vc5_bo_flink(struct vc5_bo *bo, uint32_t *name)
+{
+ struct drm_gem_flink flink = {
+ .handle = bo->handle,
+ };
+ int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink);
+ if (ret) {
+ fprintf(stderr, "Failed to flink bo %d: %s\n",
+ bo->handle, strerror(errno));
+ free(bo);
+ return false;
+ }
+
+ bo->private = false;
+ *name = flink.name;
+
+ return true;
+}
+
+static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+ struct drm_vc5_wait_seqno wait = {
+ .seqno = seqno,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason)
+{
+ if (screen->finished_seqno >= seqno)
+ return true;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on seqno %lld for %s\n",
+ (long long)seqno, reason);
+ }
+ }
+
+ int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ screen->finished_seqno = seqno;
+ return true;
+}
+
+static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ struct drm_vc5_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason)
+{
+ struct vc5_screen *screen = bo->screen;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
+ }
+
+ int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo)
+{
+ uint64_t offset;
+ int ret;
+
+ if (bo->map)
+ return bo->map;
+
+ struct drm_vc5_mmap_bo map;
+ memset(&map, 0, sizeof(map));
+ map.handle = bo->handle;
+ ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_VC5_MMAP_BO, &map);
+ offset = map.offset;
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bo->screen->fd, offset);
+ if (bo->map == MAP_FAILED) {
+ fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ bo->handle, (long long)offset, bo->size);
+ abort();
+ }
+ VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
+
+ return bo->map;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo)
+{
+ void *map = vc5_bo_map_unsynchronized(bo);
+
+ bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
+ if (!ok) {
+ fprintf(stderr, "BO wait for map failed\n");
+ abort();
+ }
+
+ return map;
+}
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_bo_cache *cache = &screen->bo_cache;
+
+ vc5_bo_cache_free_all(cache);
+
+ if (dump_stats) {
+ fprintf(stderr, "BO stats after screen destroy:\n");
+ vc5_bo_dump_stats(screen);
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h
new file mode 100644
index 000000000..cca2b2287
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_bufmgr.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_BUFMGR_H
+#define VC5_BUFMGR_H
+
+#include <stdint.h>
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/list.h"
+#include "vc5_screen.h"
+
+struct vc5_context;
+
+struct vc5_bo {
+ struct pipe_reference reference;
+ struct vc5_screen *screen;
+ void *map;
+ const char *name;
+ uint32_t handle;
+ uint32_t size;
+
+ /* Address of the BO in our page tables. */
+ uint32_t offset;
+
+ /** Entry in the linked list of buffers freed, by age. */
+ struct list_head time_list;
+ /** Entry in the per-page-count linked list of buffers freed (by age). */
+ struct list_head size_list;
+ /** Approximate second when the bo was freed. */
+ time_t free_time;
+ /**
+ * Whether only our process has a reference to the BO (meaning that
+ * it's safe to reuse it in the BO cache).
+ */
+ bool private;
+};
+
+struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size,
+ const char *name);
+void vc5_bo_last_unreference(struct vc5_bo *bo);
+void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time);
+struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name,
+ uint32_t winsys_stride);
+struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd,
+ uint32_t winsys_stride);
+bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name);
+int vc5_bo_get_dmabuf(struct vc5_bo *bo);
+
+static inline void
+vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo)
+{
+ if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
+ vc5_bo_last_unreference(*old_bo);
+ *old_bo = new_bo;
+}
+
+static inline struct vc5_bo *
+vc5_bo_reference(struct vc5_bo *bo)
+{
+ pipe_reference(NULL, &bo->reference);
+ return bo;
+}
+
+static inline void
+vc5_bo_unreference(struct vc5_bo **bo)
+{
+ struct vc5_screen *screen;
+ if (!*bo)
+ return;
+
+ if ((*bo)->private) {
+ /* Avoid the mutex for private BOs */
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference(*bo);
+ } else {
+ screen = (*bo)->screen;
+ mtx_lock(&screen->bo_handles_mutex);
+
+ if (pipe_reference(&(*bo)->reference, NULL)) {
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)(*bo)->handle);
+ vc5_bo_last_unreference(*bo);
+ }
+
+ mtx_unlock(&screen->bo_handles_mutex);
+ }
+
+ *bo = NULL;
+}
+
+static inline void
+vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time)
+{
+ if (!*bo)
+ return;
+
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc5_bo_last_unreference_locked_timed(*bo, time);
+ *bo = NULL;
+}
+
+void *
+vc5_bo_map(struct vc5_bo *bo);
+
+void *
+vc5_bo_map_unsynchronized(struct vc5_bo *bo);
+
+bool
+vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason);
+
+bool
+vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
+
+void
+vc5_bufmgr_destroy(struct pipe_screen *pscreen);
+
+#endif /* VC5_BUFMGR_H */
+
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c
new file mode 100644
index 000000000..37d96c436
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/ralloc.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+void
+vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl)
+{
+ cl->base = NULL;
+ cl->next = cl->base;
+ cl->size = 0;
+ cl->job = job;
+}
+
+uint32_t
+vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment)
+{
+ uint32_t offset = align(cl_offset(cl), alignment);
+
+ if (offset + space <= cl->size) {
+ cl->next = cl->base + offset;
+ return offset;
+ }
+
+ vc5_bo_unreference(&cl->bo);
+ cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL");
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+
+ return 0;
+}
+
+void
+vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space)
+{
+ if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size)
+ return;
+
+ struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL");
+ assert(space <= new_bo->size);
+
+ /* Chain to the new BO from the old one. */
+ if (cl->bo) {
+ cl_emit(cl, BRANCH, branch) {
+ branch.address = cl_address(new_bo, 0);
+ }
+ vc5_bo_unreference(&cl->bo);
+ } else {
+ /* Root the first RCL/BCL BO in the job. */
+ vc5_job_add_bo(cl->job, cl->bo);
+ }
+
+ cl->bo = new_bo;
+ cl->base = vc5_bo_map(cl->bo);
+ cl->size = cl->bo->size;
+ cl->next = cl->base;
+}
+
+void
+vc5_destroy_cl(struct vc5_cl *cl)
+{
+ vc5_bo_unreference(&cl->bo);
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h
new file mode 100644
index 000000000..64ccac805
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_cl.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CL_H
+#define VC5_CL_H
+
+#include <stdint.h>
+
+#include "util/u_math.h"
+#include "util/macros.h"
+
+struct vc5_bo;
+struct vc5_job;
+struct vc5_cl;
+
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc5_cl_out;
+
+/** A reference to a BO used in the CL packing functions */
+struct vc5_cl_reloc {
+ struct vc5_bo *bo;
+ uint32_t offset;
+};
+
+static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *);
+
+#define __gen_user_data struct vc5_cl
+#define __gen_address_type struct vc5_cl_reloc
+#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
+ (reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+struct vc5_cl {
+ void *base;
+ struct vc5_job *job;
+ struct vc5_cl_out *next;
+ struct vc5_bo *bo;
+ uint32_t size;
+};
+
+void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl);
+void vc5_destroy_cl(struct vc5_cl *cl);
+void vc5_dump_cl(void *cl, uint32_t size, bool is_render);
+uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo);
+
+struct PACKED unaligned_16 { uint16_t x; };
+struct PACKED unaligned_32 { uint32_t x; };
+
+static inline uint32_t cl_offset(struct vc5_cl *cl)
+{
+ return (char *)cl->next - (char *)cl->base;
+}
+
+static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl)
+{
+ return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
+}
+
+static inline void
+cl_advance(struct vc5_cl_out **cl, uint32_t n)
+{
+ (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n);
+}
+
+static inline struct vc5_cl_out *
+cl_start(struct vc5_cl *cl)
+{
+ return cl->next;
+}
+
+static inline void
+cl_end(struct vc5_cl *cl, struct vc5_cl_out *next)
+{
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
+
+
+static inline void
+put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val)
+{
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
+
+static inline void
+cl_u8(struct vc5_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
+}
+
+static inline void
+cl_u16(struct vc5_cl_out **cl, uint16_t n)
+{
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
+
+static inline void
+cl_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n)
+{
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
+
+static inline void
+cl_aligned_reloc(struct vc5_cl *cl,
+ struct vc5_cl_out **cl_out,
+ struct vc5_bo *bo, uint32_t offset)
+{
+ cl_aligned_u32(cl_out, bo->offset + offset);
+ vc5_job_add_bo(cl->job, bo);
+}
+
+static inline void
+cl_ptr(struct vc5_cl_out **cl, void *ptr)
+{
+ *(struct vc5_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
+}
+
+static inline void
+cl_f(struct vc5_cl_out **cl, float f)
+{
+ cl_u32(cl, fui(f));
+}
+
+static inline void
+cl_aligned_f(struct vc5_cl_out **cl, float f)
+{
+ cl_aligned_u32(cl, fui(f));
+}
+
+/**
+ * Reference to a BO with its associated offset, used in the pack process.
+ */
+static inline struct vc5_cl_reloc
+cl_address(struct vc5_bo *bo, uint32_t offset)
+{
+ struct vc5_cl_reloc reloc = {
+ .bo = bo,
+ .offset = offset,
+ };
+ return reloc;
+}
+
+uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align);
+void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size);
+
+#define cl_packet_header(packet) V3D33_ ## packet ## _header
+#define cl_packet_length(packet) V3D33_ ## packet ## _length
+#define cl_packet_pack(packet) V3D33_ ## packet ## _pack
+#define cl_packet_struct(packet) V3D33_ ## packet
+
+static inline void *
+cl_get_emit_space(struct vc5_cl_out **cl, size_t size)
+{
+ void *addr = *cl;
+ cl_advance(cl, size);
+ return addr;
+}
+
+/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ * .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ */
+#define cl_emit(cl, packet, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ struct vc5_cl_out *cl_out = cl_start(cl); \
+ cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \
+ cl_packet_length(packet))); \
+ cl_advance(&cl_out, cl_packet_length(packet)); \
+ cl_end(cl, cl_out); \
+ _loop_terminate = NULL; \
+ })) \
+
+#define cl_emit_prepacked(cl, packet) do { \
+ memcpy((cl)->next, packet, sizeof(*packet)); \
+ cl_advance(&(cl)->next, sizeof(*packet)); \
+} while (0)
+
+/**
+ * Helper function called by the XML-generated pack functions for filling in
+ * an address field in shader records.
+ *
+ * Since we have a private address space as of VC5, our BOs can have lifelong
+ * offsets, and all the kernel needs to know is which BOs need to be paged in
+ * for this exec.
+ */
+static inline void
+cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc)
+{
+ if (reloc->bo)
+ vc5_job_add_bo(cl->job, reloc->bo);
+}
+
+#endif /* VC5_CL_H */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_context.c b/lib/mesa/src/gallium/drivers/vc5/vc5_context.c
new file mode 100644
index 000000000..f80020ab3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_context.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xf86drm.h>
+#include <err.h>
+
+#include "pipe/p_defines.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+#include "pipe/p_screen.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+
+void
+vc5_flush(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+static void
+vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (fence) {
+ struct pipe_screen *screen = pctx->screen;
+ struct vc5_fence *f = vc5_fence_create(vc5->screen,
+ vc5->last_emit_seqno);
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle *)f;
+ }
+}
+
+static void
+vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ rsc->initialized_buffers = 0;
+
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (!entry)
+ return;
+
+ struct vc5_job *job = entry->data;
+ if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+ job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+}
+
+static void
+vc5_context_destroy(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5_flush(pctx);
+
+ if (vc5->blitter)
+ util_blitter_destroy(vc5->blitter);
+
+ if (vc5->primconvert)
+ util_primconvert_destroy(vc5->primconvert);
+
+ if (vc5->uploader)
+ u_upload_destroy(vc5->uploader);
+
+ slab_destroy_child(&vc5->transfer_pool);
+
+ pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL);
+ pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL);
+
+ vc5_program_fini(pctx);
+
+ ralloc_free(vc5);
+}
+
+struct pipe_context *
+vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_context *vc5;
+
+ /* Prevent dumping of the shaders built during context setup. */
+ uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB;
+ V3D_DEBUG &= ~V3D_DEBUG_SHADERDB;
+
+ vc5 = rzalloc(NULL, struct vc5_context);
+ if (!vc5)
+ return NULL;
+ struct pipe_context *pctx = &vc5->base;
+
+ vc5->screen = screen;
+
+ pctx->screen = pscreen;
+ pctx->priv = priv;
+ pctx->destroy = vc5_context_destroy;
+ pctx->flush = vc5_pipe_flush;
+ pctx->invalidate_resource = vc5_invalidate_resource;
+
+ vc5_draw_init(pctx);
+ vc5_state_init(pctx);
+ vc5_program_init(pctx);
+ vc5_query_init(pctx);
+ vc5_resource_context_init(pctx);
+
+ vc5_job_init(vc5);
+
+ vc5->fd = screen->fd;
+
+ slab_create_child(&vc5->transfer_pool, &screen->transfer_pool);
+
+ vc5->uploader = u_upload_create_default(&vc5->base);
+ vc5->base.stream_uploader = vc5->uploader;
+ vc5->base.const_uploader = vc5->uploader;
+
+ vc5->blitter = util_blitter_create(pctx);
+ if (!vc5->blitter)
+ goto fail;
+
+ vc5->primconvert = util_primconvert_create(pctx,
+ (1 << PIPE_PRIM_QUADS) - 1);
+ if (!vc5->primconvert)
+ goto fail;
+
+ V3D_DEBUG |= saved_shaderdb_flag;
+
+ vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+
+ return &vc5->base;
+
+fail:
+ pctx->destroy(pctx);
+ return NULL;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_context.h b/lib/mesa/src/gallium/drivers/vc5/vc5_context.h
new file mode 100644
index 000000000..a1017bd1a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_context.h
@@ -0,0 +1,473 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_CONTEXT_H
+#define VC5_CONTEXT_H
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/bitset.h"
+#include "util/slab.h"
+#include "xf86drm.h"
+#include "vc5_drm.h"
+#include "vc5_screen.h"
+
+struct vc5_job;
+struct vc5_bo;
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+
+#define __user
+#include "vc5_drm.h"
+#include "vc5_bufmgr.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+
+#ifdef USE_VC5_SIMULATOR
+#define using_vc5_simulator true
+#else
+#define using_vc5_simulator false
+#endif
+
+#define VC5_DIRTY_BLEND (1 << 0)
+#define VC5_DIRTY_RASTERIZER (1 << 1)
+#define VC5_DIRTY_ZSA (1 << 2)
+#define VC5_DIRTY_FRAGTEX (1 << 3)
+#define VC5_DIRTY_VERTTEX (1 << 4)
+
+#define VC5_DIRTY_BLEND_COLOR (1 << 7)
+#define VC5_DIRTY_STENCIL_REF (1 << 8)
+#define VC5_DIRTY_SAMPLE_MASK (1 << 9)
+#define VC5_DIRTY_FRAMEBUFFER (1 << 10)
+#define VC5_DIRTY_STIPPLE (1 << 11)
+#define VC5_DIRTY_VIEWPORT (1 << 12)
+#define VC5_DIRTY_CONSTBUF (1 << 13)
+#define VC5_DIRTY_VTXSTATE (1 << 14)
+#define VC5_DIRTY_VTXBUF (1 << 15)
+#define VC5_DIRTY_SCISSOR (1 << 17)
+#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18)
+#define VC5_DIRTY_PRIM_MODE (1 << 19)
+#define VC5_DIRTY_CLIP (1 << 20)
+#define VC5_DIRTY_UNCOMPILED_VS (1 << 21)
+#define VC5_DIRTY_UNCOMPILED_FS (1 << 22)
+#define VC5_DIRTY_COMPILED_CS (1 << 23)
+#define VC5_DIRTY_COMPILED_VS (1 << 24)
+#define VC5_DIRTY_COMPILED_FS (1 << 25)
+#define VC5_DIRTY_FS_INPUTS (1 << 26)
+#define VC5_DIRTY_STREAMOUT (1 << 27)
+
+#define VC5_MAX_FS_INPUTS 64
+
+struct vc5_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t p0;
+ uint32_t p1;
+ /* Precomputed swizzles to pass in to the shader key. */
+ uint8_t swizzle[4];
+
+ uint8_t texture_shader_state[32];
+};
+
+struct vc5_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t p0;
+ uint32_t p1;
+
+ uint8_t texture_shader_state[32];
+};
+
+struct vc5_texture_stateobj {
+ struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+ unsigned num_textures;
+ struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+ struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS];
+};
+
+struct vc5_shader_uniform_info {
+ enum quniform_contents *contents;
+ uint32_t *data;
+ uint32_t count;
+};
+
+struct vc5_uncompiled_shader {
+ /** A name for this program, so you can track it in shader-db output. */
+ uint32_t program_id;
+ /** How many variants of this program were compiled, for shader-db. */
+ uint32_t compiled_variant_count;
+ struct pipe_shader_state base;
+ uint32_t num_tf_outputs;
+ struct v3d_varying_slot *tf_outputs;
+ uint16_t tf_specs[PIPE_MAX_SO_BUFFERS];
+ uint32_t num_tf_specs;
+};
+
+struct vc5_compiled_shader {
+ struct vc5_bo *bo;
+
+ union {
+ struct v3d_prog_data *base;
+ struct v3d_vs_prog_data *vs;
+ struct v3d_fs_prog_data *fs;
+ } prog_data;
+
+ /**
+ * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
+};
+
+struct vc5_program_stateobj {
+ struct vc5_uncompiled_shader *bind_vs, *bind_fs;
+ struct vc5_compiled_shader *cs, *vs, *fs;
+};
+
+struct vc5_constbuf_stateobj {
+ struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertexbuf_stateobj {
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ unsigned count;
+ uint32_t enabled_mask;
+ uint32_t dirty_mask;
+};
+
+struct vc5_vertex_stateobj {
+ struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES];
+ unsigned num_elements;
+
+ uint8_t attrs[12 * VC5_MAX_ATTRIBUTES];
+ struct vc5_bo *default_attribute_values;
+};
+
+struct vc5_streamout_stateobj {
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_targets;
+};
+
+/* Hash table key for vc5->jobs */
+struct vc5_job_key {
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+};
+
+/**
+ * A complete bin/render job.
+ *
+ * This is all of the state necessary to submit a bin/render to the kernel.
+ * We want to be able to have multiple in progress at a time, so that we don't
+ * need to flush an existing CL just to switch to rendering to a new render
+ * target (which would mean reading back from the old render target when
+ * starting to render to it again).
+ */
+struct vc5_job {
+ struct vc5_context *vc5;
+ struct vc5_cl bcl;
+ struct vc5_cl rcl;
+ struct vc5_cl indirect;
+ struct vc5_bo *tile_alloc;
+ uint32_t shader_rec_count;
+
+ struct drm_vc5_submit_cl submit;
+
+ /**
+ * Set of all BOs referenced by the job. This will be used for making
+ * the list of BOs that the kernel will need to have paged in to
+ * execute our job.
+ */
+ struct set *bos;
+
+ struct set *write_prscs;
+
+ /* Size of the submit.bo_handles array. */
+ uint32_t bo_handles_size;
+
+ /** @{ Surfaces to submit rendering for. */
+ struct pipe_surface *cbufs[4];
+ struct pipe_surface *zsbuf;
+ /** @} */
+ /** @{
+ * Bounding box of the scissor across all queued drawing.
+ *
+ * Note that the max values are exclusive.
+ */
+ uint32_t draw_min_x;
+ uint32_t draw_min_y;
+ uint32_t draw_max_x;
+ uint32_t draw_max_y;
+ /** @} */
+ /** @{
+ * Width/height of the color framebuffer being rendered to,
+ * for VC5_TILE_RENDERING_MODE_CONFIG.
+ */
+ uint32_t draw_width;
+ uint32_t draw_height;
+ /** @} */
+ /** @{ Tile information, depending on MSAA and float color buffer. */
+ uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
+ uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
+
+ uint32_t tile_width; /** @< Width of a tile. */
+ uint32_t tile_height; /** @< Height of a tile. */
+ /** maximum internal_bpp of all color render targets. */
+ uint32_t internal_bpp;
+
+ /** Whether the current rendering is in a 4X MSAA tile buffer. */
+ bool msaa;
+ /** @} */
+
+ /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
+ * first rendering.
+ */
+ uint32_t cleared;
+ /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
+ * (either clears or draws).
+ */
+ uint32_t resolve;
+ uint32_t clear_color[4][4];
+ float clear_z;
+ uint8_t clear_s;
+
+ /**
+ * Set if some drawing (triangles, blits, or just a glClear()) has
+ * been done to the FBO, meaning that we need to
+ * DRM_IOCTL_VC5_SUBMIT_CL.
+ */
+ bool needs_flush;
+
+ bool uses_early_z;
+
+ /**
+ * Number of draw calls (not counting full buffer clears) queued in
+ * the current job.
+ */
+ uint32_t draw_calls_queued;
+
+ struct vc5_job_key key;
+};
+
+struct vc5_context {
+ struct pipe_context base;
+
+ int fd;
+ struct vc5_screen *screen;
+
+ /** The 3D rendering job for the currently bound FBO. */
+ struct vc5_job *job;
+
+ /* Map from struct vc5_job_key to the job for that FBO.
+ */
+ struct hash_table *jobs;
+
+ /**
+ * Map from vc5_resource to a job writing to that resource.
+ *
+ * Primarily for flushing jobs rendering to textures that are now
+ * being read from.
+ */
+ struct hash_table *write_jobs;
+
+ struct slab_child_pool transfer_pool;
+ struct blitter_context *blitter;
+
+ /** bitfield of VC5_DIRTY_* */
+ uint32_t dirty;
+
+ struct primconvert_context *primconvert;
+
+ struct hash_table *fs_cache, *vs_cache;
+ uint32_t next_uncompiled_program_id;
+ uint64_t next_compiled_program_id;
+
+ struct vc5_compiler_state *compiler_state;
+
+ uint8_t prim_mode;
+
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+
+ /** Seqno of the last CL flush's job. */
+ uint64_t last_emit_seqno;
+
+ struct u_upload_mgr *uploader;
+
+ /** @{ Current pipeline state objects */
+ struct pipe_scissor_state scissor;
+ struct pipe_blend_state *blend;
+ struct vc5_rasterizer_state *rasterizer;
+ struct vc5_depth_stencil_alpha_state *zsa;
+
+ struct vc5_texture_stateobj verttex, fragtex;
+
+ struct vc5_program_stateobj prog;
+
+ struct vc5_vertex_stateobj *vtx;
+
+ struct {
+ struct pipe_blend_color f;
+ uint16_t hf[4];
+ } blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ unsigned sample_mask;
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple stipple;
+ struct pipe_clip_state clip;
+ struct pipe_viewport_state viewport;
+ struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+ struct vc5_vertexbuf_stateobj vertexbuf;
+ struct vc5_streamout_stateobj streamout;
+ /** @} */
+};
+
+struct vc5_rasterizer_state {
+ struct pipe_rasterizer_state base;
+
+ /* VC5_CONFIGURATION_BITS */
+ uint8_t config_bits[3];
+
+ float point_size;
+
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset units for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_units;
+ /**
+ * Half-float (1/8/7 bits) value of polygon offset scale for
+ * VC5_PACKET_DEPTH_OFFSET
+ */
+ uint16_t offset_factor;
+};
+
+struct vc5_depth_stencil_alpha_state {
+ struct pipe_depth_stencil_alpha_state base;
+
+ bool early_z_enable;
+
+ /** Uniforms for stencil state.
+ *
+ * Index 0 is either the front config, or the front-and-back config.
+ * Index 1 is the back config if doing separate back stencil.
+ * Index 2 is the writemask config if it's not a common mask value.
+ */
+ uint32_t stencil_uniforms[3];
+};
+
+#define perf_debug(...) do { \
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
+ fprintf(stderr, __VA_ARGS__); \
+} while (0)
+
+static inline struct vc5_context *
+vc5_context(struct pipe_context *pcontext)
+{
+ return (struct vc5_context *)pcontext;
+}
+
+static inline struct vc5_sampler_view *
+vc5_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc5_sampler_view *)psview;
+}
+
+static inline struct vc5_sampler_state *
+vc5_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc5_sampler_state *)psampler;
+}
+
+struct pipe_context *vc5_context_create(struct pipe_screen *pscreen,
+ void *priv, unsigned flags);
+void vc5_draw_init(struct pipe_context *pctx);
+void vc5_state_init(struct pipe_context *pctx);
+void vc5_program_init(struct pipe_context *pctx);
+void vc5_program_fini(struct pipe_context *pctx);
+void vc5_query_init(struct pipe_context *pctx);
+
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_init(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+void vc5_simulator_destroy(struct vc5_screen *screen);
+int vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_vc5_submit_cl *args,
+ struct vc5_job *job);
+int vc5_simulator_ioctl(int fd, unsigned long request, void *arg);
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size);
+
+static inline int
+vc5_ioctl(int fd, unsigned long request, void *arg)
+{
+ if (using_vc5_simulator)
+ return vc5_simulator_ioctl(fd, request, arg);
+ else
+ return drmIoctl(fd, request, arg);
+}
+
+void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader);
+struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate);
+
+void vc5_flush(struct pipe_context *pctx);
+void vc5_job_init(struct vc5_context *vc5);
+struct vc5_job *vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs,
+ struct pipe_surface *zsbuf);
+struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5);
+void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
+void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc);
+void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job);
+void vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc);
+void vc5_emit_state(struct pipe_context *pctx);
+void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode);
+
+bool vc5_rt_format_supported(enum pipe_format f);
+bool vc5_tex_format_supported(enum pipe_format f);
+uint8_t vc5_get_rt_format(enum pipe_format f);
+uint8_t vc5_get_tex_format(enum pipe_format f);
+uint8_t vc5_get_tex_return_size(enum pipe_format f);
+uint8_t vc5_get_tex_return_channels(enum pipe_format f);
+const uint8_t *vc5_get_format_swizzle(enum pipe_format f);
+void vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp);
+
+void vc5_init_query_functions(struct vc5_context *vc5);
+void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
+void vc5_blitter_save(struct vc5_context *vc5);
+void vc5_emit_rcl(struct vc5_job *job);
+
+
+#endif /* VC5_CONTEXT_H */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c b/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c
new file mode 100644
index 000000000..11d9e92a9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_draw.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blitter.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "util/u_pack_color.h"
+#include "util/u_prim_restart.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_primconvert.h"
+
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_cl.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+/**
+ * Does the initial bining command list setup for drawing to a given FBO.
+ */
+static void
+vc5_start_draw(struct vc5_context *vc5)
+{
+ struct vc5_job *job = vc5->job;
+
+ if (job->needs_flush)
+ return;
+
+ /* Get space to emit our BCL state, using a branch to jump to a new BO
+ * if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ job->submit.bcl_start = job->bcl.bo->offset;
+ vc5_job_add_bo(job, job->bcl.bo);
+
+ job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
+ struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen,
+ job->draw_tiles_y *
+ job->draw_tiles_x *
+ 64,
+ "TSDA");
+
+ /* "Binning mode lists start with a Tile Binning Mode Configuration
+ * item (120)"
+ *
+ * Part1 signals the end of binning config setup.
+ */
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) {
+ config.tile_allocation_memory_address =
+ cl_address(job->tile_alloc, 0);
+ config.tile_allocation_memory_size = job->tile_alloc->size;
+ }
+
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
+ config.tile_state_data_array_base_address =
+ cl_address(tsda, 0);
+
+ config.width_in_tiles = job->draw_tiles_x;
+ config.height_in_tiles = job->draw_tiles_y;
+
+ /* Must be >= 1 */
+ config.number_of_render_targets =
+ MAX2(vc5->framebuffer.nr_cbufs, 1);
+
+ config.multisample_mode_4x = job->msaa;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ vc5_bo_unreference(&tsda);
+
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+
+ cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) {
+ fmt.data_type = LIST_INDEXED;
+ fmt.primitive_type = LIST_TRIANGLES;
+ }
+
+ job->needs_flush = true;
+ job->draw_width = vc5->framebuffer.width;
+ job->draw_height = vc5->framebuffer.height;
+}
+
+static void
+vc5_predraw_check_textures(struct pipe_context *pctx,
+ struct vc5_texture_stateobj *stage_tex)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ for (int i = 0; i < stage_tex->num_textures; i++) {
+ struct pipe_sampler_view *view = stage_tex->textures[i];
+ if (!view)
+ continue;
+
+ vc5_flush_jobs_writing_resource(vc5, view->texture);
+ }
+}
+
+static void
+vc5_emit_gl_shader_state(struct vc5_context *vc5,
+ const struct pipe_draw_info *info)
+{
+ struct vc5_job *job = vc5->job;
+ /* VC5_DIRTY_VTXSTATE */
+ struct vc5_vertex_stateobj *vtx = vc5->vtx;
+ /* VC5_DIRTY_VTXBUF */
+ struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf;
+
+ /* Upload the uniforms to the indirect CL first */
+ struct vc5_cl_reloc fs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.fs,
+ &vc5->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc5->fragtex);
+ struct vc5_cl_reloc vs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.vs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+ struct vc5_cl_reloc cs_uniforms =
+ vc5_write_uniforms(vc5, vc5->prog.cs,
+ &vc5->constbuf[PIPE_SHADER_VERTEX],
+ &vc5->verttex);
+
+ uint32_t shader_rec_offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(GL_SHADER_STATE_RECORD) +
+ vtx->num_elements *
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+ 32);
+
+ cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
+ shader.enable_clipping = true;
+ /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
+ shader.point_size_in_shaded_vertex_data =
+ (info->mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ /* Must be set if the shader modifies Z, discards, or modifies
+ * the sample mask. For any of these cases, the fragment
+ * shader needs to write the Z value (even just discards).
+ */
+ shader.fragment_shader_does_z_writes =
+ (vc5->prog.fs->prog_data.fs->writes_z ||
+ vc5->prog.fs->prog_data.fs->discard);
+
+ shader.number_of_varyings_in_fragment_shader =
+ vc5->prog.fs->prog_data.base->num_inputs;
+
+ shader.propagate_nans = true;
+
+ shader.coordinate_shader_code_address =
+ cl_address(vc5->prog.cs->bo, 0);
+ shader.vertex_shader_code_address =
+ cl_address(vc5->prog.vs->bo, 0);
+ shader.fragment_shader_code_address =
+ cl_address(vc5->prog.fs->bo, 0);
+
+ /* XXX: Use combined input/output size flag in the common
+ * case.
+ */
+ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.coordinate_shader_input_vpm_segment_size =
+ vc5->prog.cs->prog_data.vs->vpm_input_size;
+ shader.vertex_shader_input_vpm_segment_size =
+ vc5->prog.vs->prog_data.vs->vpm_input_size;
+
+ shader.coordinate_shader_output_vpm_segment_size =
+ vc5->prog.cs->prog_data.vs->vpm_output_size;
+ shader.vertex_shader_output_vpm_segment_size =
+ vc5->prog.vs->prog_data.vs->vpm_output_size;
+
+ shader.coordinate_shader_uniforms_address = cs_uniforms;
+ shader.vertex_shader_uniforms_address = vs_uniforms;
+ shader.fragment_shader_uniforms_address = fs_uniforms;
+
+ shader.vertex_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->uses_iid;
+ shader.vertex_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_vid;
+ shader.instance_id_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->uses_iid;
+
+ shader.address_of_default_attribute_values =
+ cl_address(vtx->default_attribute_values, 0);
+ }
+
+ for (int i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vertexbuf->vb[elem->vertex_buffer_index];
+ struct vc5_resource *rsc = vc5_resource(vb->buffer.resource);
+
+ struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = {
+ .stride = vb->stride,
+ .address = cl_address(rsc->bo,
+ vb->buffer_offset +
+ elem->src_offset),
+ .number_of_values_read_by_coordinate_shader =
+ vc5->prog.cs->prog_data.vs->vattr_sizes[i],
+ .number_of_values_read_by_vertex_shader =
+ vc5->prog.vs->prog_data.vs->vattr_sizes[i],
+ };
+ const uint32_t size =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+ uint8_t attr_packed[size];
+ V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(&job->indirect,
+ attr_packed,
+ &attr_unpacked);
+ for (int j = 0; j < size; j++)
+ attr_packed[j] |= vtx->attrs[i * size + j];
+ cl_emit_prepacked(&job->indirect, &attr_packed);
+ }
+
+ cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+ state.address = cl_address(job->indirect.bo, shader_rec_offset);
+ state.number_of_attribute_arrays = vtx->num_elements;
+ }
+
+ vc5_bo_unreference(&cs_uniforms.bo);
+ vc5_bo_unreference(&vs_uniforms.bo);
+ vc5_bo_unreference(&fs_uniforms.bo);
+
+ job->shader_rec_count++;
+}
+
+static void
+vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ if (!info->count_from_stream_output && !info->indirect &&
+ !info->primitive_restart &&
+ !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
+ return;
+
+ /* Fall back for weird desktop GL primitive restart values. */
+ if (info->primitive_restart &&
+ info->index_size) {
+ uint32_t mask = ~0;
+
+ switch (info->index_size) {
+ case 2:
+ mask = 0xffff;
+ break;
+ case 1:
+ mask = 0xff;
+ break;
+ }
+
+ if (info->restart_index != mask) {
+ util_draw_vbo_without_prim_restart(pctx, info);
+ return;
+ }
+ }
+
+ if (info->mode >= PIPE_PRIM_QUADS) {
+ util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base);
+ util_primconvert_draw_vbo(vc5->primconvert, info);
+ perf_debug("Fallback conversion for %d %s vertices\n",
+ info->count, u_prim_name(info->mode));
+ return;
+ }
+
+ /* Before setting up the draw, flush anything writing to the textures
+ * that we read from.
+ */
+ vc5_predraw_check_textures(pctx, &vc5->verttex);
+ vc5_predraw_check_textures(pctx, &vc5->fragtex);
+
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* Get space to emit our draw call into the BCL, using a branch to
+ * jump to a new BO if necessary.
+ */
+ vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
+
+ if (vc5->prim_mode != info->mode) {
+ vc5->prim_mode = info->mode;
+ vc5->dirty |= VC5_DIRTY_PRIM_MODE;
+ }
+
+ vc5_start_draw(vc5);
+ vc5_update_compiled_shaders(vc5, info->mode);
+
+ vc5_emit_state(pctx);
+
+ if (vc5->dirty & (VC5_DIRTY_VTXBUF |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_COMPILED_CS |
+ VC5_DIRTY_COMPILED_VS |
+ VC5_DIRTY_COMPILED_FS |
+ vc5->prog.cs->uniform_dirty_bits |
+ vc5->prog.vs->uniform_dirty_bits |
+ vc5->prog.fs->uniform_dirty_bits)) {
+ vc5_emit_gl_shader_state(vc5, info);
+ }
+
+ vc5->dirty = 0;
+
+ /* The Base Vertex/Base Instance packet sets those values to nonzero
+ * for the next draw call only.
+ */
+ if (info->index_bias || info->start_instance) {
+ cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
+ base.base_instance = info->start_instance;
+ base.base_vertex = info->index_bias;
+ }
+ }
+
+ /* The HW only processes transform feedback on primitives with the
+ * flag set.
+ */
+ uint32_t prim_tf_enable = 0;
+ if (vc5->prog.bind_vs->num_tf_outputs)
+ prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
+
+ /* Note that the primitive type fields match with OpenGL/gallium
+ * definitions, up to but not including QUADS.
+ */
+ if (info->index_size) {
+ uint32_t index_size = info->index_size;
+ uint32_t offset = info->start * index_size;
+ struct pipe_resource *prsc;
+ if (info->has_user_indices) {
+ prsc = NULL;
+ u_upload_data(vc5->uploader, 0,
+ info->count * info->index_size, 4,
+ info->index.user,
+ &offset, &prsc);
+ } else {
+ prsc = info->index.resource;
+ }
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+ prim.mode = info->mode | prim_tf_enable;
+ prim.enable_primitive_restarts = info->primitive_restart;
+
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
+ prim.index_type = ffs(info->index_size) - 1;
+ prim.length = info->count;
+ prim.maximum_index = (1u << 31) - 1; /* XXX */
+ prim.address_of_indices_list =
+ cl_address(rsc->bo, offset);
+ prim.mode = info->mode | prim_tf_enable;
+ prim.enable_primitive_restarts = info->primitive_restart;
+ }
+ }
+
+ job->draw_calls_queued++;
+
+ if (info->has_user_indices)
+ pipe_resource_reference(&prsc, NULL);
+ } else {
+ if (info->instance_count > 1) {
+ cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) {
+ prim.mode = info->mode | prim_tf_enable;
+ prim.index_of_first_vertex = info->start;
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->count;
+ }
+ } else {
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) {
+ prim.mode = info->mode | prim_tf_enable;
+ prim.length = info->count;
+ prim.index_of_first_vertex = info->start;
+ }
+ }
+ }
+ job->draw_calls_queued++;
+
+ if (vc5->zsa && job->zsbuf &&
+ (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled)) {
+ struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
+ vc5_job_add_bo(job, rsc->bo);
+
+ if (vc5->zsa->base.depth.enabled) {
+ job->resolve |= PIPE_CLEAR_DEPTH;
+ rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
+
+ if (vc5->zsa->early_z_enable)
+ job->uses_early_z = true;
+ }
+
+ if (vc5->zsa->base.stencil[0].enabled) {
+ job->resolve |= PIPE_CLEAR_STENCIL;
+ rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
+ }
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+
+ if (job->resolve & bit || !job->cbufs[i])
+ continue;
+ struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture);
+
+ job->resolve |= bit;
+ vc5_job_add_bo(job, rsc->bo);
+ }
+
+ if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ vc5_flush(pctx);
+}
+
+static void
+vc5_clear(struct pipe_context *pctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5_get_job_for_fbo(vc5);
+
+ /* We can't flag new buffers for clearing once we've queued draws. We
+ * could avoid this by using the 3d engine to clear.
+ */
+ if (job->draw_calls_queued) {
+ perf_debug("Flushing rendering to process new clear.\n");
+ vc5_job_submit(vc5, job);
+ job = vc5_get_job_for_fbo(vc5);
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ uint32_t bit = PIPE_CLEAR_COLOR0 << i;
+ if (!(buffers & bit))
+ continue;
+
+ struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i];
+ struct vc5_resource *rsc =
+ vc5_resource(cbuf->texture);
+
+ union util_color uc;
+ util_pack_color(color->f, cbuf->format, &uc);
+
+ memcpy(job->clear_color[i], uc.ui,
+ util_format_get_blocksize(cbuf->format));
+
+ rsc->initialized_buffers |= bit;
+ }
+
+ unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
+ if (zsclear) {
+ struct vc5_resource *rsc =
+ vc5_resource(vc5->framebuffer.zsbuf->texture);
+
+ if (zsclear & PIPE_CLEAR_DEPTH)
+ job->clear_z = depth;
+ if (zsclear & PIPE_CLEAR_STENCIL)
+ job->clear_s = stencil;
+
+ rsc->initialized_buffers |= zsclear;
+ }
+
+ job->draw_min_x = 0;
+ job->draw_min_y = 0;
+ job->draw_max_x = vc5->framebuffer.width;
+ job->draw_max_y = vc5->framebuffer.height;
+ job->cleared |= buffers;
+ job->resolve |= buffers;
+
+ vc5_start_draw(vc5);
+}
+
+static void
+vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+ const union pipe_color_union *color,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear RT\n");
+}
+
+static void
+vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+ unsigned buffers, double depth, unsigned stencil,
+ unsigned x, unsigned y, unsigned w, unsigned h,
+ bool render_condition_enabled)
+{
+ fprintf(stderr, "unimpl: clear DS\n");
+}
+
+void
+vc5_draw_init(struct pipe_context *pctx)
+{
+ pctx->draw_vbo = vc5_draw_vbo;
+ pctx->clear = vc5_clear;
+ pctx->clear_render_target = vc5_clear_render_target;
+ pctx->clear_depth_stencil = vc5_clear_depth_stencil;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h b/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h
new file mode 100644
index 000000000..e70cf9d56
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_drm.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _VC5_DRM_H_
+#define _VC5_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_VC5_SUBMIT_CL 0x00
+#define DRM_VC5_WAIT_SEQNO 0x01
+#define DRM_VC5_WAIT_BO 0x02
+#define DRM_VC5_CREATE_BO 0x03
+#define DRM_VC5_MMAP_BO 0x04
+#define DRM_VC5_GET_PARAM 0x05
+#define DRM_VC5_GET_BO_OFFSET 0x06
+
+#define DRM_IOCTL_VC5_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl)
+#define DRM_IOCTL_VC5_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno)
+#define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo)
+#define DRM_IOCTL_VC5_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo)
+#define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo)
+#define DRM_IOCTL_VC5_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_PARAM, struct drm_vc5_get_param)
+#define DRM_IOCTL_VC5_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset)
+
+/**
+ * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute an optional binner
+ * command list, and a render command list.
+ */
+struct drm_vc5_submit_cl {
+ /* Pointer to the binner command list.
+ *
+ * This is the first set of commands executed, which runs the
+ * coordinate shader to determine where primitives land on the screen,
+ * then writes out the state updates and draw calls necessary per tile
+ * to the tile allocation BO.
+ */
+ __u32 bcl_start;
+
+ /** End address of the BCL (first byte after the BCL) */
+ __u32 bcl_end;
+
+ /* Offset of the render command list.
+ *
+ * This is the second set of commands executed, which will either
+ * execute the tiles that have been set up by the BCL, or a fixed set
+ * of tiles (in the case of RCL-only blits).
+ */
+ __u32 rcl_start;
+
+ /** End address of the RCL (first byte after the RCL) */
+ __u32 rcl_end;
+
+ /* Pointer to a u32 array of the BOs that are referenced by the job.
+ */
+ __u64 bo_handles;
+
+ /* Pointer to an array of chunks of extra submit CL information. (the
+ * chunk struct is not yet defined)
+ */
+ __u64 chunks;
+
+ /* Number of BO handles passed in (size is that times 4). */
+ __u32 bo_handle_count;
+
+ __u32 chunk_count;
+
+ __u64 flags;
+};
+
+/**
+ * struct drm_vc5_wait_seqno - ioctl argument for waiting for
+ * DRM_VC5_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc5_wait_seqno {
+ __u64 seqno;
+ __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC5_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc5_wait_bo {
+ __u32 handle;
+ __u32 pad;
+ __u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc5_create_bo - ioctl argument for creating VC5 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_create_bo {
+ __u32 size;
+ __u32 flags;
+ /** Returned GEM handle for the BO. */
+ __u32 handle;
+ /**
+ * Returned offset for the BO in the V3D address space. This offset
+ * is private to the DRM fd and is valid for the lifetime of the GEM
+ * handle.
+ */
+ __u32 offset;
+};
+
+/**
+ * struct drm_vc5_mmap_bo - ioctl argument for mapping VC5 BOs.
+ *
+ * This doesn't actually perform an mmap. Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node. This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc5_mmap_bo {
+ /** Handle for the object being mapped. */
+ __u32 handle;
+ __u32 flags;
+ /** offset into the drm node to use for subsequent mmap call. */
+ __u64 offset;
+};
+
+enum drm_vc5_param {
+ DRM_VC5_PARAM_V3D_UIFCFG,
+ DRM_VC5_PARAM_V3D_HUB_IDENT1,
+ DRM_VC5_PARAM_V3D_HUB_IDENT2,
+ DRM_VC5_PARAM_V3D_HUB_IDENT3,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+ DRM_VC5_PARAM_V3D_CORE0_IDENT2,
+};
+
+struct drm_vc5_get_param {
+ __u32 param;
+ __u32 pad;
+ __u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the V3D address space for this DRM fd.
+ * This is the same value returned by drm_vc5_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_vc5_get_bo_offset {
+ __u32 handle;
+ __u32 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _VC5_DRM_H_ */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c b/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c
new file mode 100644
index 000000000..3cb44feff
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_emit.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_half.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+static uint8_t
+vc5_factor(enum pipe_blendfactor factor)
+{
+ /* We may get a bad blendfactor when blending is disabled. */
+ if (factor == 0)
+ return V3D_BLEND_FACTOR_ZERO;
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return V3D_BLEND_FACTOR_ZERO;
+ case PIPE_BLENDFACTOR_ONE:
+ return V3D_BLEND_FACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return V3D_BLEND_FACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return V3D_BLEND_FACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return V3D_BLEND_FACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return V3D_BLEND_FACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return V3D_BLEND_FACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return V3D_BLEND_FACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return V3D_BLEND_FACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return V3D_BLEND_FACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+ default:
+ unreachable("Bad blend factor");
+ }
+}
+
+static inline uint16_t
+swizzled_border_color(struct pipe_sampler_state *sampler,
+ struct vc5_sampler_view *sview,
+ int chan)
+{
+ const struct util_format_description *desc =
+ util_format_description(sview->base.format);
+ uint8_t swiz = chan;
+
+ /* If we're doing swizzling in the sampler, then only rearrange the
+ * border color for the mismatch between the VC5 texture format and
+ * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
+ * the sampler's swizzle.
+ *
+ * For swizzling in the shader, we don't do any pre-swizzling of the
+ * border color.
+ */
+ if (vc5_get_tex_return_size(sview->base.format) != 32)
+ swiz = desc->swizzle[swiz];
+
+ switch (swiz) {
+ case PIPE_SWIZZLE_0:
+ return util_float_to_half(0.0);
+ case PIPE_SWIZZLE_1:
+ return util_float_to_half(1.0);
+ default:
+ return util_float_to_half(sampler->border_color.f[swiz]);
+ }
+}
+
+static void
+emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
+ int i)
+{
+ struct vc5_job *job = vc5->job;
+ struct pipe_sampler_state *psampler = stage_tex->samplers[i];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+ struct pipe_sampler_view *psview = stage_tex->textures[i];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+ struct pipe_resource *prsc = psview->texture;
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ stage_tex->texture_state[i].offset =
+ vc5_cl_ensure_space(&job->indirect,
+ cl_packet_length(TEXTURE_SHADER_STATE),
+ 32);
+ vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
+ job->indirect.bo);
+
+ struct V3D33_TEXTURE_SHADER_STATE unpacked = {
+ /* XXX */
+ .border_color_red = swizzled_border_color(psampler, sview, 0),
+ .border_color_green = swizzled_border_color(psampler, sview, 1),
+ .border_color_blue = swizzled_border_color(psampler, sview, 2),
+ .border_color_alpha = swizzled_border_color(psampler, sview, 3),
+
+ /* XXX: Disable min/maxlod for txf */
+ .max_level_of_detail = MIN2(MIN2(psampler->max_lod,
+ VC5_MAX_MIP_LEVELS),
+ psview->u.tex.last_level),
+
+ .texture_base_pointer = cl_address(rsc->bo,
+ rsc->slices[0].offset),
+ };
+
+ int min_img_filter = psampler->min_img_filter;
+ int min_mip_filter = psampler->min_mip_filter;
+ int mag_img_filter = psampler->mag_img_filter;
+
+ if (vc5_get_tex_return_size(psview->format) == 32) {
+ min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ }
+
+ bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST);
+ switch (min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ unpacked.minification_filter = 0 + min_nearest;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ unpacked.minification_filter = 2 + !min_nearest;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ unpacked.minification_filter = 4 + !min_nearest;
+ break;
+ }
+ unpacked.magnification_filter = (mag_img_filter ==
+ PIPE_TEX_FILTER_NEAREST);
+
+ uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
+ cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
+
+ for (int i = 0; i < ARRAY_SIZE(packed); i++)
+ packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
+
+ cl_emit_prepacked(&job->indirect, &packed);
+}
+
+static void
+emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
+{
+ for (int i = 0; i < stage_tex->num_textures; i++)
+ emit_one_texture(vc5, stage_tex, i);
+}
+
+void
+vc5_emit_state(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_job *job = vc5->job;
+
+ if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
+ VC5_DIRTY_RASTERIZER)) {
+ float *vpscale = vc5->viewport.scale;
+ float *vptranslate = vc5->viewport.translate;
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
+
+ /* Clip to the scissor if it's enabled, but still clip to the
+ * drawable regardless since that controls where the binner
+ * tries to put things.
+ *
+ * Additionally, always clip the rendering to the viewport,
+ * since the hardware does guardband clipping, meaning
+ * primitives would rasterize outside of the view volume.
+ */
+ uint32_t minx, miny, maxx, maxy;
+ if (!vc5->rasterizer->base.scissor) {
+ minx = MAX2(vp_minx, 0);
+ miny = MAX2(vp_miny, 0);
+ maxx = MIN2(vp_maxx, job->draw_width);
+ maxy = MIN2(vp_maxy, job->draw_height);
+ } else {
+ minx = MAX2(vp_minx, vc5->scissor.minx);
+ miny = MAX2(vp_miny, vc5->scissor.miny);
+ maxx = MIN2(vp_maxx, vc5->scissor.maxx);
+ maxy = MIN2(vp_maxy, vc5->scissor.maxy);
+ }
+
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
+ clip.clip_window_left_pixel_coordinate = minx;
+ clip.clip_window_bottom_pixel_coordinate = miny;
+ clip.clip_window_height_in_pixels = maxy - miny;
+ clip.clip_window_width_in_pixels = maxx - minx;
+ clip.clip_window_height_in_pixels = maxy - miny;
+ }
+
+ job->draw_min_x = MIN2(job->draw_min_x, minx);
+ job->draw_min_y = MIN2(job->draw_min_y, miny);
+ job->draw_max_x = MAX2(job->draw_max_x, maxx);
+ job->draw_max_y = MAX2(job->draw_max_y, maxy);
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_COMPILED_FS)) {
+ cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
+ config.enable_forward_facing_primitive =
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_FRONT);
+ config.enable_reverse_facing_primitive =
+ !(vc5->rasterizer->base.cull_face &
+ PIPE_FACE_BACK);
+ /* This seems backwards, but it's what gets the
+ * clipflat test to pass.
+ */
+ config.clockwise_primitives =
+ vc5->rasterizer->base.front_ccw;
+
+ config.enable_depth_offset =
+ vc5->rasterizer->base.offset_tri;
+
+ config.rasterizer_oversample_mode =
+ vc5->rasterizer->base.multisample;
+
+ config.direct3d_provoking_vertex =
+ vc5->rasterizer->base.flatshade_first;
+
+ config.blend_enable = vc5->blend->rt[0].blend_enable;
+
+ config.early_z_updates_enable = true;
+ if (vc5->zsa->base.depth.enabled) {
+ config.z_updates_enable =
+ vc5->zsa->base.depth.writemask;
+ config.early_z_enable =
+ vc5->zsa->early_z_enable;
+ config.depth_test_function =
+ vc5->zsa->base.depth.func;
+ } else {
+ config.depth_test_function = PIPE_FUNC_ALWAYS;
+ }
+ }
+
+ }
+
+ if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
+ cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
+ depth.depth_offset_factor =
+ vc5->rasterizer->offset_factor;
+ depth.depth_offset_units =
+ vc5->rasterizer->offset_units;
+ }
+
+ cl_emit(&job->bcl, POINT_SIZE, point_size) {
+ point_size.point_size = vc5->rasterizer->point_size;
+ }
+
+ cl_emit(&job->bcl, LINE_WIDTH, line_width) {
+ line_width.line_width = vc5->rasterizer->base.line_width;
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_256th_of_pixel =
+ vc5->viewport.scale[0] * 256.0f;
+ clip.viewport_half_height_in_1_256th_of_pixel =
+ vc5->viewport.scale[1] * 256.0f;
+ }
+
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs =
+ vc5->viewport.translate[2];
+ clip.viewport_z_scale_zc_to_zs =
+ vc5->viewport.scale[2];
+ }
+ if (0 /* XXX */) {
+ cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+ clip.minimum_zw = (vc5->viewport.translate[2] -
+ vc5->viewport.scale[2]);
+ clip.maximum_zw = (vc5->viewport.translate[2] +
+ vc5->viewport.scale[2]);
+ }
+ }
+
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
+ vp.viewport_centre_x_coordinate =
+ vc5->viewport.translate[0];
+ vp.viewport_centre_y_coordinate =
+ vc5->viewport.translate[1];
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND) {
+ struct pipe_blend_state *blend = vc5->blend;
+
+ cl_emit(&job->bcl, BLEND_CONFIG, config) {
+ struct pipe_rt_blend_state *rtblend = &blend->rt[0];
+
+ config.colour_blend_mode = rtblend->rgb_func;
+ config.colour_blend_dst_factor =
+ vc5_factor(rtblend->rgb_dst_factor);
+ config.colour_blend_src_factor =
+ vc5_factor(rtblend->rgb_src_factor);
+
+ config.alpha_blend_mode = rtblend->alpha_func;
+ config.alpha_blend_dst_factor =
+ vc5_factor(rtblend->alpha_dst_factor);
+ config.alpha_blend_src_factor =
+ vc5_factor(rtblend->alpha_src_factor);
+ }
+
+ cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
+ if (blend->independent_blend_enable) {
+ mask.render_target_0_per_colour_component_write_masks =
+ (~blend->rt[0].colormask) & 0xf;
+ mask.render_target_1_per_colour_component_write_masks =
+ (~blend->rt[1].colormask) & 0xf;
+ mask.render_target_2_per_colour_component_write_masks =
+ (~blend->rt[2].colormask) & 0xf;
+ mask.render_target_3_per_colour_component_write_masks =
+ (~blend->rt[3].colormask) & 0xf;
+ } else {
+ uint8_t colormask = (~blend->rt[0].colormask) & 0xf;
+ mask.render_target_0_per_colour_component_write_masks = colormask;
+ mask.render_target_1_per_colour_component_write_masks = colormask;
+ mask.render_target_2_per_colour_component_write_masks = colormask;
+ mask.render_target_3_per_colour_component_write_masks = colormask;
+ }
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
+ cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
+ /* XXX: format-dependent swizzling */
+ colour.red_f16 = vc5->blend_color.hf[2];
+ colour.green_f16 = vc5->blend_color.hf[1];
+ colour.blue_f16 = vc5->blend_color.hf[0];
+ colour.alpha_f16 = vc5->blend_color.hf[3];
+ }
+ }
+
+ if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
+ struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
+ struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
+
+ cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+ config.front_config = true;
+ config.back_config = !back->enabled;
+
+ config.stencil_write_mask = front->writemask;
+ config.stencil_test_mask = front->valuemask;
+
+ config.stencil_test_function = front->func;
+ config.stencil_pass_op = front->zpass_op;
+ config.depth_test_fail_op = front->zfail_op;
+ config.stencil_test_fail_op = front->fail_op;
+
+ config.stencil_ref_value = vc5->stencil_ref.ref_value[0];
+ }
+
+ if (back->enabled) {
+ cl_emit(&job->bcl, STENCIL_CONFIG, config) {
+ config.front_config = false;
+ config.back_config = true;
+
+ config.stencil_write_mask = back->writemask;
+ config.stencil_test_mask = back->valuemask;
+
+ config.stencil_test_function = back->func;
+ config.stencil_pass_op = back->zpass_op;
+ config.depth_test_fail_op = back->zfail_op;
+ config.stencil_test_fail_op = back->fail_op;
+
+ config.stencil_ref_value =
+ vc5->stencil_ref.ref_value[1];
+ }
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_FRAGTEX)
+ emit_textures(vc5, &vc5->fragtex);
+
+ if (vc5->dirty & VC5_DIRTY_VERTTEX)
+ emit_textures(vc5, &vc5->verttex);
+
+ if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
+ /* XXX: Need to handle more than 24 entries. */
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+ flags.varying_offset_v0 = 0;
+
+ flags.flat_shade_flags_for_varyings_v024 =
+ vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
+
+ if (vc5->rasterizer->base.flatshade) {
+ flags.flat_shade_flags_for_varyings_v024 |=
+ vc5->prog.fs->prog_data.fs->shade_model_flags[0] & 0xfffff;
+ }
+ }
+ }
+
+ if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
+ struct vc5_streamout_stateobj *so = &vc5->streamout;
+
+ if (so->num_targets) {
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
+ tfe.number_of_32_bit_output_buffer_address_following =
+ so->num_targets;
+ tfe.number_of_16_bit_output_data_specs_following =
+ vc5->prog.bind_vs->num_tf_specs;
+ };
+
+ for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
+ cl_emit_prepacked(&job->bcl,
+ &vc5->prog.bind_vs->tf_specs[i]);
+ }
+
+ for (int i = 0; i < so->num_targets; i++) {
+ const struct pipe_stream_output_target *target =
+ so->targets[i];
+ struct vc5_resource *rsc =
+ vc5_resource(target->buffer);
+
+ cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
+ output.address =
+ cl_address(rsc->bo,
+ target->buffer_offset);
+ };
+
+ vc5_job_add_write_resource(vc5->job,
+ target->buffer);
+ /* XXX: buffer_size? */
+ }
+ } else {
+ /* XXX? */
+ }
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c b/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c
new file mode 100644
index 000000000..08de9bca5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_fence.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed. The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "vc5_screen.h"
+#include "vc5_bufmgr.h"
+
+struct vc5_fence {
+ struct pipe_reference reference;
+ uint64_t seqno;
+};
+
+static void
+vc5_fence_reference(struct pipe_screen *pscreen,
+ struct pipe_fence_handle **pp,
+ struct pipe_fence_handle *pf)
+{
+ struct vc5_fence **p = (struct vc5_fence **)pp;
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+ struct vc5_fence *old = *p;
+
+ if (pipe_reference(&(*p)->reference, &f->reference)) {
+ free(old);
+ }
+ *p = f;
+}
+
+static boolean
+vc5_fence_finish(struct pipe_screen *pscreen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *pf,
+ uint64_t timeout_ns)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_fence *f = (struct vc5_fence *)pf;
+
+ return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
+}
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno)
+{
+ struct vc5_fence *f = calloc(1, sizeof(*f));
+
+ if (!f)
+ return NULL;
+
+ pipe_reference_init(&f->reference, 1);
+ f->seqno = seqno;
+
+ return f;
+}
+
+void
+vc5_fence_init(struct vc5_screen *screen)
+{
+ screen->base.fence_reference = vc5_fence_reference;
+ screen->base.fence_finish = vc5_fence_finish;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c b/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c
new file mode 100644
index 000000000..114f2d741
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_formats.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_formats.c
+ *
+ * Contains the table and accessors for VC5 texture and render target format
+ * support.
+ *
+ * The hardware has limited support for texture formats, and extremely limited
+ * support for render target formats. As a result, we emulate other formats
+ * in our shader code, and this stores the table for doing so.
+ */
+
+#include "util/u_format.h"
+#include "util/macros.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#define OUTPUT_IMAGE_FORMAT_NO 255
+
+struct vc5_format {
+ /** Set if the pipe format is defined in the table. */
+ bool present;
+
+ /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+ uint8_t rt_type;
+
+ /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+ uint8_t tex_type;
+
+ /**
+ * Swizzle to apply to the RGBA shader output for storing to the tile
+ * buffer, to the RGBA tile buffer to produce shader input (for
+ * blending), and for turning the rgba8888 texture sampler return
+ * value into shader rgba values.
+ */
+ uint8_t swizzle[4];
+
+ /* Whether the return value is 16F/I/UI or 32F/I/UI. */
+ uint8_t return_size;
+
+ /* If return_size == 32, how many channels are returned by texturing.
+ * 16 always returns 2 pairs of 16 bit values.
+ */
+ uint8_t return_channels;
+};
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \
+ [PIPE_FORMAT_##pipe] = { \
+ true, \
+ OUTPUT_IMAGE_FORMAT_##rt, \
+ TEXTURE_DATA_FORMAT_##tex, \
+ swiz, \
+ return_size, \
+ return_channels, \
+ }
+
+#define SWIZ_X001 SWIZ(X, 0, 0, 1)
+#define SWIZ_XY01 SWIZ(X, Y, 0, 1)
+#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1)
+#define SWIZ_XYZW SWIZ(X, Y, Z, W)
+#define SWIZ_YZWX SWIZ(Y, Z, W, X)
+#define SWIZ_YZW1 SWIZ(Y, Z, W, 1)
+#define SWIZ_ZYXW SWIZ(Z, Y, X, W)
+#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1)
+#define SWIZ_XXXY SWIZ(X, X, X, Y)
+#define SWIZ_XXX1 SWIZ(X, X, X, 1)
+#define SWIZ_XXXX SWIZ(X, X, X, X)
+#define SWIZ_000X SWIZ(0, 0, 0, X)
+
+static const struct vc5_format vc5_format_table[] = {
+ FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_UNORM, RGBX8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0),
+ FORMAT(B8G8R8X8_SRGB, SRGBX8, RGBA8, SWIZ_ZYX1, 16, 0),
+ FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_UNORM, RGBX8, RGBA8, SWIZ_XYZ1, 16, 0),
+ FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0),
+ FORMAT(B10G10R10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_ZYXW, 16, 0),
+
+ FORMAT(B4G4R4A4_UNORM, ABGR4444, RGBA4, SWIZ_YZWX, 16, 0),
+ FORMAT(B4G4R4X4_UNORM, ABGR4444, RGBA4, SWIZ_YZW1, 16, 0),
+
+ FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0),
+ FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0),
+ FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0),
+
+ FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0),
+ FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0),
+
+ FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1),
+ FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1),
+ FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0),
+ FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1),
+
+ FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2),
+ FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2),
+
+ FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4),
+ FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0),
+ FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4),
+
+ /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */
+ FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1),
+ FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1),
+ FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1),
+ FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1),
+ FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1),
+ FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1),
+ FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2),
+ FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2),
+
+ FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0),
+ FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0),
+ FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0),
+ FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0),
+
+ FORMAT(R8_SINT, R8I, S8, SWIZ_X001, 16, 0),
+ FORMAT(R8_UINT, R8UI, S8, SWIZ_X001, 16, 0),
+ FORMAT(R8G8_SINT, RG8I, S16, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8_UINT, RG8UI, S16, SWIZ_XY01, 16, 0),
+ FORMAT(R8G8B8A8_SINT, RGBA8I, R32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R8G8B8A8_UINT, RGBA8UI, R32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R16_SINT, R16I, S16, SWIZ_X001, 16, 0),
+ FORMAT(R16_UINT, R16UI, S16, SWIZ_X001, 16, 0),
+ FORMAT(R16G16_SINT, RG16I, R32F, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16_UINT, RG16UI, R32F, SWIZ_XY01, 16, 0),
+ FORMAT(R16G16B16A16_SINT, RGBA16I, RG32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R16G16B16A16_UINT, RGBA16UI, RG32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(R32_SINT, R32I, R32F, SWIZ_X001, 16, 0),
+ FORMAT(R32_UINT, R32UI, R32F, SWIZ_X001, 16, 0),
+ FORMAT(R32G32_SINT, RG32I, RG32F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32_UINT, RG32UI, RG32F, SWIZ_XY01, 16, 0),
+ FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32F, SWIZ_XYZW, 16, 0),
+ FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32F, SWIZ_XYZW, 16, 0),
+
+ FORMAT(A8_SINT, R8I, S8, SWIZ_000X, 16, 0),
+ FORMAT(A8_UINT, R8UI, S8, SWIZ_000X, 16, 0),
+ FORMAT(A16_SINT, R16I, S16, SWIZ_000X, 16, 0),
+ FORMAT(A16_UINT, R16UI, S16, SWIZ_000X, 16, 0),
+ FORMAT(A32_SINT, R32I, R32F, SWIZ_000X, 16, 0),
+ FORMAT(A32_UINT, R32UI, R32F, SWIZ_000X, 16, 0),
+
+ FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0),
+ FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0),
+
+ FORMAT(S8_UINT_Z24_UNORM, DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_X001, 32, 1),
+ FORMAT(X8Z24_UNORM, DEPTH_COMPONENT24, DEPTH24_X8, SWIZ_X001, 32, 1),
+ FORMAT(S8X24_UINT, NO, R32F, SWIZ_X001, 32, 1),
+ FORMAT(Z32_FLOAT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+ FORMAT(Z16_UNORM, DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_X001, 32, 1),
+
+ /* Pretend we support this, but it'll be separate Z32F depth and S8. */
+ FORMAT(Z32_FLOAT_S8X24_UINT, DEPTH_COMPONENT32F, R32F, SWIZ_X001, 32, 1),
+
+ FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0),
+ FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0),
+ FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0),
+ FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0),
+
+ FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0),
+ FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0),
+};
+
+static const struct vc5_format *
+get_format(enum pipe_format f)
+{
+ if (f >= ARRAY_SIZE(vc5_format_table) ||
+ !vc5_format_table[f].present)
+ return NULL;
+ else
+ return &vc5_format_table[f];
+}
+
+bool
+vc5_rt_format_supported(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return false;
+
+ return vf->rt_type != OUTPUT_IMAGE_FORMAT_NO;
+}
+
+uint8_t
+vc5_get_rt_format(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->rt_type;
+}
+
+bool
+vc5_tex_format_supported(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ return vf != NULL;
+}
+
+uint8_t
+vc5_get_tex_format(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->tex_type;
+}
+
+uint8_t
+vc5_get_tex_return_size(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->return_size;
+}
+
+uint8_t
+vc5_get_tex_return_channels(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+
+ if (!vf)
+ return 0;
+
+ return vf->return_channels;
+}
+
+const uint8_t *
+vc5_get_format_swizzle(enum pipe_format f)
+{
+ const struct vc5_format *vf = get_format(f);
+ static const uint8_t fallback[] = {0, 1, 2, 3};
+
+ if (!vf)
+ return fallback;
+
+ return vf->swizzle;
+}
+
+void
+vc5_get_internal_type_bpp_for_output_format(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp)
+{
+ switch (format) {
+ case OUTPUT_IMAGE_FORMAT_RGBA8:
+ case OUTPUT_IMAGE_FORMAT_RGBX8:
+ case OUTPUT_IMAGE_FORMAT_RGB8:
+ case OUTPUT_IMAGE_FORMAT_RG8:
+ case OUTPUT_IMAGE_FORMAT_R8:
+ case OUTPUT_IMAGE_FORMAT_ABGR4444:
+ case OUTPUT_IMAGE_FORMAT_BGR565:
+ case OUTPUT_IMAGE_FORMAT_ABGR1555:
+ *type = INTERNAL_TYPE_8;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA8I:
+ case OUTPUT_IMAGE_FORMAT_RG8I:
+ case OUTPUT_IMAGE_FORMAT_R8I:
+ *type = INTERNAL_TYPE_8I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA8UI:
+ case OUTPUT_IMAGE_FORMAT_RG8UI:
+ case OUTPUT_IMAGE_FORMAT_R8UI:
+ *type = INTERNAL_TYPE_8UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
+ case OUTPUT_IMAGE_FORMAT_SRGB:
+ case OUTPUT_IMAGE_FORMAT_RGB10_A2:
+ case OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
+ case OUTPUT_IMAGE_FORMAT_SRGBX8:
+ case OUTPUT_IMAGE_FORMAT_RGBA16F:
+ /* Note that sRGB RTs are stored in the tile buffer at 16F,
+ * and the conversion to sRGB happens at tilebuffer
+ * load/store.
+ */
+ *type = INTERNAL_TYPE_16F;
+ *bpp = INTERNAL_BPP_64;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RG16F:
+ case OUTPUT_IMAGE_FORMAT_R16F:
+ *type = INTERNAL_TYPE_16F;
+ /* Use 64bpp to make sure the TLB doesn't throw away the alpha
+ * channel before alpha test happens.
+ */
+ *bpp = INTERNAL_BPP_64;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA16I:
+ *type = INTERNAL_TYPE_16I;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG16I:
+ case OUTPUT_IMAGE_FORMAT_R16I:
+ *type = INTERNAL_TYPE_16I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA16UI:
+ *type = INTERNAL_TYPE_16UI;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG16UI:
+ case OUTPUT_IMAGE_FORMAT_R16UI:
+ *type = INTERNAL_TYPE_16UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32I:
+ *type = INTERNAL_TYPE_32I;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32UI:
+ *type = INTERNAL_TYPE_32UI;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ case OUTPUT_IMAGE_FORMAT_RGBA32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_128;
+ break;
+ case OUTPUT_IMAGE_FORMAT_RG32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_64;
+ break;
+ case OUTPUT_IMAGE_FORMAT_R32F:
+ *type = INTERNAL_TYPE_32F;
+ *bpp = INTERNAL_BPP_32;
+ break;
+
+ default:
+ /* Provide some default values, as we'll be called at RB
+ * creation time, even if an RB with this format isn't
+ * supported.
+ */
+ *type = INTERNAL_TYPE_8;
+ *bpp = INTERNAL_BPP_32;
+ break;
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_job.c b/lib/mesa/src/gallium/drivers/vc5/vc5_job.c
new file mode 100644
index 000000000..ed1a64be8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_job.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_job.c
+ *
+ * Functions for submitting VC5 render jobs to the kernel.
+ */
+
+#include <xf86drm.h>
+#include "vc5_context.h"
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "broadcom/clif/clif_dump.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+remove_from_ht(struct hash_table *ht, void *key)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static void
+vc5_job_free(struct vc5_context *vc5, struct vc5_job *job)
+{
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ vc5_bo_unreference(&bo);
+ }
+
+ remove_from_ht(vc5->jobs, &job->key);
+
+ if (job->write_prscs) {
+ struct set_entry *entry;
+
+ set_foreach(job->write_prscs, entry) {
+ const struct pipe_resource *prsc = entry->key;
+
+ remove_from_ht(vc5->write_jobs, (void *)prsc);
+ }
+ }
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], NULL);
+ }
+ }
+ if (job->zsbuf) {
+ remove_from_ht(vc5->write_jobs, job->zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, NULL);
+ }
+
+ if (vc5->job == job)
+ vc5->job = NULL;
+
+ vc5_destroy_cl(&job->bcl);
+ vc5_destroy_cl(&job->rcl);
+ vc5_destroy_cl(&job->indirect);
+ vc5_bo_unreference(&job->tile_alloc);
+
+ ralloc_free(job);
+}
+
+static struct vc5_job *
+vc5_job_create(struct vc5_context *vc5)
+{
+ struct vc5_job *job = rzalloc(vc5, struct vc5_job);
+
+ job->vc5 = vc5;
+
+ vc5_init_cl(job, &job->bcl);
+ vc5_init_cl(job, &job->rcl);
+ vc5_init_cl(job, &job->indirect);
+
+ job->draw_min_x = ~0;
+ job->draw_min_y = ~0;
+ job->draw_max_x = 0;
+ job->draw_max_y = 0;
+
+ job->bos = _mesa_set_create(job,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ return job;
+}
+
+void
+vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo)
+{
+ if (!bo)
+ return;
+
+ if (_mesa_set_search(job->bos, bo))
+ return;
+
+ vc5_bo_reference(bo);
+ _mesa_set_add(job->bos, bo);
+
+ uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
+
+ if (job->submit.bo_handle_count >= job->bo_handles_size) {
+ job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
+ bo_handles = reralloc(job, bo_handles,
+ uint32_t, job->bo_handles_size);
+ job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
+ }
+ bo_handles[job->submit.bo_handle_count++] = bo->handle;
+}
+
+void
+vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc)
+{
+ struct vc5_context *vc5 = job->vc5;
+
+ if (!job->write_prscs) {
+ job->write_prscs = _mesa_set_create(job,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ }
+
+ _mesa_set_add(job->write_prscs, prsc);
+ _mesa_hash_table_insert(vc5->write_jobs, prsc, job);
+}
+
+void
+vc5_flush_jobs_writing_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs,
+ prsc);
+ if (entry) {
+ struct vc5_job *job = entry->data;
+ vc5_job_submit(vc5, job);
+ }
+}
+
+void
+vc5_flush_jobs_reading_resource(struct vc5_context *vc5,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->jobs, entry) {
+ struct vc5_job *job = entry->data;
+
+ if (_mesa_set_search(job->bos, rsc->bo)) {
+ vc5_job_submit(vc5, job);
+ /* Reminder: vc5->jobs is safe to keep iterating even
+ * after deletion of an entry.
+ */
+ continue;
+ }
+ }
+}
+
+static void
+vc5_job_set_tile_buffer_size(struct vc5_job *job)
+{
+ static const uint8_t tile_sizes[] = {
+ 64, 64,
+ 64, 32,
+ 32, 32,
+ 32, 16,
+ 16, 16,
+ };
+ int tile_size_index = 0;
+ if (job->msaa)
+ tile_size_index += 2;
+
+ if (job->cbufs[3] || job->cbufs[2])
+ tile_size_index += 2;
+ else if (job->cbufs[1])
+ tile_size_index++;
+
+ int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i]) {
+ struct vc5_surface *surf = vc5_surface(job->cbufs[i]);
+ max_bpp = MAX2(max_bpp, surf->internal_bpp);
+ }
+ }
+ job->internal_bpp = max_bpp;
+ STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
+ tile_size_index += max_bpp;
+
+ assert(tile_size_index < ARRAY_SIZE(tile_sizes));
+ job->tile_width = tile_sizes[tile_size_index * 2 + 0];
+ job->tile_height = tile_sizes[tile_size_index * 2 + 1];
+}
+
+/**
+ * Returns a vc5_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one. If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc5_job *
+vc5_get_job(struct vc5_context *vc5,
+ struct pipe_surface **cbufs, struct pipe_surface *zsbuf)
+{
+ /* Return the existing job for this FBO if we have one */
+ struct vc5_job_key local_key = {
+ .cbufs = {
+ cbufs[0],
+ cbufs[1],
+ cbufs[2],
+ cbufs[3],
+ },
+ .zsbuf = zsbuf,
+ };
+ struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs,
+ &local_key);
+ if (entry)
+ return entry->data;
+
+ /* Creating a new job. Make sure that any previous jobs reading or
+ * writing these buffers are flushed.
+ */
+ struct vc5_job *job = vc5_job_create(vc5);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i]) {
+ vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture);
+ pipe_surface_reference(&job->cbufs[i], cbufs[i]);
+
+ if (cbufs[i]->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+ }
+ if (zsbuf) {
+ vc5_flush_jobs_reading_resource(vc5, zsbuf->texture);
+ pipe_surface_reference(&job->zsbuf, zsbuf);
+ if (zsbuf->texture->nr_samples > 1)
+ job->msaa = true;
+ }
+
+ vc5_job_set_tile_buffer_size(job);
+
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (cbufs[i])
+ _mesa_hash_table_insert(vc5->write_jobs,
+ cbufs[i]->texture, job);
+ }
+ if (zsbuf)
+ _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job);
+
+ memcpy(&job->key, &local_key, sizeof(local_key));
+ _mesa_hash_table_insert(vc5->jobs, &job->key, job);
+
+ return job;
+}
+
+struct vc5_job *
+vc5_get_job_for_fbo(struct vc5_context *vc5)
+{
+ if (vc5->job)
+ return vc5->job;
+
+ struct pipe_surface **cbufs = vc5->framebuffer.cbufs;
+ struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf;
+ struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf);
+
+ /* The dirty flags are tracking what's been updated while vc5->job has
+ * been bound, so set them all to ~0 when switching between jobs. We
+ * also need to reset all state at the start of rendering.
+ */
+ vc5->dirty = ~0;
+
+ /* If we're binding to uninitialized buffers, no need to load their
+ * contents before drawing.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (cbufs[i]) {
+ struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_COLOR0 << i;
+ }
+ }
+
+ if (zsbuf) {
+ struct vc5_resource *rsc = vc5_resource(zsbuf->texture);
+ if (!rsc->writes)
+ job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+ }
+
+ job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width,
+ job->tile_width);
+ job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height,
+ job->tile_height);
+
+ vc5->job = job;
+
+ return job;
+}
+
+static bool
+vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr)
+{
+ struct vc5_job *job = data;
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (void *)entry->key;
+
+ if (addr >= bo->offset &&
+ addr < bo->offset + bo->size) {
+ vc5_bo_map(bo);
+ *vaddr = bo->map + addr - bo->offset;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job)
+{
+ if (!(V3D_DEBUG & V3D_DEBUG_CL))
+ return;
+
+ struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo,
+ stderr, vc5_clif_dump_lookup,
+ job);
+
+ fprintf(stderr, "BCL: 0x%08x..0x%08x\n",
+ job->submit.bcl_start, job->submit.bcl_end);
+
+ clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end);
+
+ fprintf(stderr, "RCL: 0x%08x..0x%08x\n",
+ job->submit.rcl_start, job->submit.rcl_end);
+ clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end);
+}
+
+/**
+ * Submits the job to the kernel and then reinitializes it.
+ */
+void
+vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
+{
+ if (!job->needs_flush)
+ goto done;
+
+ /* The RCL setup would choke if the draw bounds cause no drawing, so
+ * just drop the drawing if that's the case.
+ */
+ if (job->draw_max_x <= job->draw_min_x ||
+ job->draw_max_y <= job->draw_min_y) {
+ goto done;
+ }
+
+ vc5_emit_rcl(job);
+
+ if (cl_offset(&job->bcl) > 0) {
+ vc5_cl_ensure_space_with_branch(&job->bcl, 2);
+
+ /* Increment the semaphore indicating that binning is done and
+ * unblocking the render thread. Note that this doesn't act
+ * until the FLUSH completes.
+ */
+ cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
+
+ /* The FLUSH caps all of our bin lists with a
+ * VC5_PACKET_RETURN.
+ */
+ cl_emit(&job->bcl, FLUSH, flush);
+ }
+
+ job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
+ job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
+
+ vc5_clif_dump(vc5, job);
+
+ if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
+ int ret;
+
+#ifndef USE_VC5_SIMULATOR
+ ret = drmIoctl(vc5->fd, DRM_IOCTL_VC5_SUBMIT_CL, &job->submit);
+#else
+ ret = vc5_simulator_flush(vc5, &job->submit, job);
+#endif
+ static bool warned = false;
+ if (ret && !warned) {
+ fprintf(stderr, "Draw call returned %s. "
+ "Expect corruption.\n", strerror(errno));
+ warned = true;
+ }
+ }
+
+ if (vc5->last_emit_seqno - vc5->screen->finished_seqno > 5) {
+ if (!vc5_wait_seqno(vc5->screen,
+ vc5->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
+
+done:
+ vc5_job_free(vc5, job);
+}
+
+static bool
+vc5_job_compare(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct vc5_job_key)) == 0;
+}
+
+static uint32_t
+vc5_job_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct vc5_job_key));
+}
+
+void
+vc5_job_init(struct vc5_context *vc5)
+{
+ vc5->jobs = _mesa_hash_table_create(vc5,
+ vc5_job_hash,
+ vc5_job_compare);
+ vc5->write_jobs = _mesa_hash_table_create(vc5,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_program.c b/lib/mesa/src/gallium/drivers/vc5/vc5_program.c
new file mode 100644
index 000000000..8e9af1ad8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_program.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+#include "util/hash_table.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "nir/tgsi_to_nir.h"
+#include "compiler/v3d_compiler.h"
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static gl_varying_slot
+vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
+{
+ nir_foreach_variable(var, &s->outputs) {
+ if (var->data.driver_location == driver_location) {
+ return var->data.location;
+ }
+ }
+
+ return -1;
+}
+
+static void
+vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
+ const struct pipe_stream_output_info *stream_output)
+{
+ if (!stream_output->num_outputs)
+ return;
+
+ struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
+ int slot_count = 0;
+
+ for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
+ uint32_t buffer_offset = 0;
+ uint32_t vpm_start = slot_count;
+
+ for (int i = 0; i < stream_output->num_outputs; i++) {
+ const struct pipe_stream_output *output =
+ &stream_output->output[i];
+
+ if (output->output_buffer != buffer)
+ continue;
+
+ /* We assume that the SO outputs appear in increasing
+ * order in the buffer.
+ */
+ assert(output->dst_offset >= buffer_offset);
+
+ /* Pad any undefined slots in the output */
+ for (int j = buffer_offset; j < output->dst_offset; j++) {
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
+ slot_count++;
+ }
+
+ /* Set the coordinate shader up to output the
+ * components of this varying.
+ */
+ for (int j = 0; j < output->num_components; j++) {
+ gl_varying_slot slot =
+ vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index);
+
+ slots[slot_count] =
+ v3d_slot_from_slot_and_component(slot,
+ output->start_component + j);
+ slot_count++;
+ }
+ }
+
+ uint32_t vpm_size = slot_count - vpm_start;
+ if (!vpm_size)
+ continue;
+
+ struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+ /* We need the offset from the coordinate shader's VPM
+ * output block, which has the [X, Y, Z, W, Xs, Ys]
+ * values at the start. Note that this will need some
+ * shifting when PSIZ is also present.
+ */
+ .first_shaded_vertex_value_to_output = vpm_start + 6,
+ .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = vpm_size - 1,
+ .output_buffer_to_write_to = buffer,
+ };
+ V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+ (void *)&so->tf_specs[so->num_tf_specs++],
+ &unpacked);
+ }
+
+ so->num_tf_outputs = slot_count;
+ so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
+ slot_count);
+ memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static void *
+vc5_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader);
+ if (!so)
+ return NULL;
+
+ so->program_id = vc5->next_uncompiled_program_id++;
+
+ nir_shader *s;
+
+ if (cso->type == PIPE_SHADER_IR_NIR) {
+ /* The backend takes ownership of the NIR shader on state
+ * creation.
+ */
+ s = cso->ir.nir;
+
+ NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
+ (nir_lower_io_options)0);
+ } else {
+ assert(cso->type == PIPE_SHADER_IR_TGSI);
+
+ if (V3D_DEBUG & V3D_DEBUG_TGSI) {
+ fprintf(stderr, "prog %d TGSI:\n",
+ so->program_id);
+ tgsi_dump(cso->tokens, 0);
+ fprintf(stderr, "\n");
+ }
+ s = tgsi_to_nir(cso->tokens, &v3d_nir_options);
+ }
+
+ NIR_PASS_V(s, nir_opt_global_to_local);
+ NIR_PASS_V(s, nir_lower_regs_to_ssa);
+ NIR_PASS_V(s, nir_normalize_cubemap_coords);
+
+ NIR_PASS_V(s, nir_lower_load_const_to_scalar);
+
+ v3d_optimize_nir(s);
+
+ NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+
+ /* Garbage collect dead instructions */
+ nir_sweep(s);
+
+ so->base.type = PIPE_SHADER_IR_NIR;
+ so->base.ir.nir = s;
+
+ vc5_set_transform_feedback_outputs(so, &cso->stream_output);
+
+ if (V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage(s->info.stage))) {
+ fprintf(stderr, "%s prog %d NIR:\n",
+ gl_shader_stage_name(s->info.stage),
+ so->program_id);
+ nir_print_shader(s, stderr);
+ fprintf(stderr, "\n");
+ }
+
+ return so;
+}
+
+static struct vc5_compiled_shader *
+vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key)
+{
+ struct vc5_uncompiled_shader *shader_state = key->shader_state;
+ nir_shader *s = shader_state->base.ir.nir;
+
+ struct hash_table *ht;
+ uint32_t key_size;
+ if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ ht = vc5->fs_cache;
+ key_size = sizeof(struct v3d_fs_key);
+ } else {
+ ht = vc5->vs_cache;
+ key_size = sizeof(struct v3d_vs_key);
+ }
+
+ struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+ if (entry)
+ return entry->data;
+
+ struct vc5_compiled_shader *shader =
+ rzalloc(NULL, struct vc5_compiled_shader);
+
+ int program_id = shader_state->program_id;
+ int variant_id =
+ p_atomic_inc_return(&shader_state->compiled_variant_count);
+ uint64_t *qpu_insts;
+ uint32_t shader_size;
+
+ switch (s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
+
+ qpu_insts = v3d_compile_vs(vc5->screen->compiler,
+ (struct v3d_vs_key *)key,
+ shader->prog_data.vs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
+
+ qpu_insts = v3d_compile_fs(vc5->screen->compiler,
+ (struct v3d_fs_key *)key,
+ shader->prog_data.fs, s,
+ program_id, variant_id,
+ &shader_size);
+ break;
+ default:
+ unreachable("bad stage");
+ }
+
+ vc5_set_shader_uniform_dirty_flags(shader);
+
+ shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader");
+ vc5_bo_map(shader->bo);
+ memcpy(shader->bo->map, qpu_insts, shader_size);
+
+ free(qpu_insts);
+
+ struct vc5_key *dup_key;
+ dup_key = ralloc_size(shader, key_size);
+ memcpy(dup_key, key, key_size);
+ _mesa_hash_table_insert(ht, dup_key, shader);
+
+ return shader;
+}
+
+static void
+vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key,
+ struct vc5_texture_stateobj *texstate)
+{
+ for (int i = 0; i < texstate->num_textures; i++) {
+ struct pipe_sampler_view *sampler = texstate->textures[i];
+ struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler);
+ struct pipe_sampler_state *sampler_state =
+ texstate->samplers[i];
+
+ if (!sampler)
+ continue;
+
+ key->tex[i].return_size =
+ vc5_get_tex_return_size(sampler->format);
+
+ /* For 16-bit, we set up the sampler to always return 2
+ * channels (meaning no recompiles for most statechanges),
+ * while for 32 we actually scale the returns with channels.
+ */
+ if (key->tex[i].return_size == 16) {
+ key->tex[i].return_channels = 2;
+ } else {
+ key->tex[i].return_channels =
+ vc5_get_tex_return_channels(sampler->format);
+ }
+
+ if (vc5_get_tex_return_size(sampler->format) == 32) {
+ memcpy(key->tex[i].swizzle,
+ vc5_sampler->swizzle,
+ sizeof(vc5_sampler->swizzle));
+ } else {
+ /* For 16-bit returns, we let the sampler state handle
+ * the swizzle.
+ */
+ key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+ key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+ key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+ key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+ }
+
+ if (sampler->texture->nr_samples > 1) {
+ key->tex[i].msaa_width = sampler->texture->width0;
+ key->tex[i].msaa_height = sampler->texture->height0;
+ } else if (sampler){
+ key->tex[i].compare_mode = sampler_state->compare_mode;
+ key->tex[i].compare_func = sampler_state->compare_func;
+ key->tex[i].wrap_s = sampler_state->wrap_s;
+ key->tex[i].wrap_t = sampler_state->wrap_t;
+ }
+ }
+
+ key->ucp_enables = vc5->rasterizer->base.clip_plane_enable;
+}
+
+static void
+vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct vc5_job *job = vc5->job;
+ struct v3d_fs_key local_key;
+ struct v3d_fs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_BLEND |
+ VC5_DIRTY_FRAMEBUFFER |
+ VC5_DIRTY_ZSA |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_SAMPLE_MASK |
+ VC5_DIRTY_FRAGTEX |
+ VC5_DIRTY_UNCOMPILED_FS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex);
+ key->base.shader_state = vc5->prog.bind_fs;
+ key->is_points = (prim_mode == PIPE_PRIM_POINTS);
+ key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
+ prim_mode <= PIPE_PRIM_LINE_STRIP);
+ key->clamp_color = vc5->rasterizer->base.clamp_fragment_color;
+ if (vc5->blend->logicop_enable) {
+ key->logicop_func = vc5->blend->logicop_func;
+ } else {
+ key->logicop_func = PIPE_LOGICOP_COPY;
+ }
+ if (job->msaa) {
+ key->msaa = vc5->rasterizer->base.multisample;
+ key->sample_coverage = (vc5->rasterizer->base.multisample &&
+ vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+ key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage;
+ key->sample_alpha_to_one = vc5->blend->alpha_to_one;
+ }
+
+ key->depth_enabled = (vc5->zsa->base.depth.enabled ||
+ vc5->zsa->base.stencil[0].enabled);
+ if (vc5->zsa->base.alpha.enabled) {
+ key->alpha_test = true;
+ key->alpha_test_func = vc5->zsa->base.alpha.func;
+ }
+
+ /* gl_FragColor's propagation to however many bound color buffers
+ * there are means that the buffer count needs to be in the key.
+ */
+ key->nr_cbufs = vc5->framebuffer.nr_cbufs;
+
+ for (int i = 0; i < key->nr_cbufs; i++) {
+ struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i];
+ const struct util_format_description *desc =
+ util_format_description(cbuf->format);
+
+ if (desc->swizzle[0] == PIPE_SWIZZLE_Z)
+ key->swap_color_rb |= 1 << i;
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
+ desc->channel[0].size == 32) {
+ key->f32_color_rb |= 1 << i;
+ }
+ }
+
+ if (key->is_points) {
+ key->point_sprite_mask =
+ vc5->rasterizer->base.sprite_coord_enable;
+ key->point_coord_upper_left =
+ (vc5->rasterizer->base.sprite_coord_mode ==
+ PIPE_SPRITE_COORD_UPPER_LEFT);
+ }
+
+ key->light_twoside = vc5->rasterizer->base.light_twoside;
+
+ struct vc5_compiled_shader *old_fs = vc5->prog.fs;
+ vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base);
+ if (vc5->prog.fs == old_fs)
+ return;
+
+ vc5->dirty |= VC5_DIRTY_COMPILED_FS;
+
+ if (old_fs &&
+ (vc5->prog.fs->prog_data.fs->flat_shade_flags !=
+ old_fs->prog_data.fs->flat_shade_flags ||
+ (vc5->rasterizer->base.flatshade &&
+ vc5->prog.fs->prog_data.fs->shade_model_flags !=
+ old_fs->prog_data.fs->shade_model_flags))) {
+ vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+ }
+
+ if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,
+ old_fs->prog_data.fs->input_slots,
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots))) {
+ vc5->dirty |= VC5_DIRTY_FS_INPUTS;
+ }
+}
+
+static void
+vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ struct v3d_vs_key local_key;
+ struct v3d_vs_key *key = &local_key;
+
+ if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE |
+ VC5_DIRTY_RASTERIZER |
+ VC5_DIRTY_VERTTEX |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_UNCOMPILED_VS |
+ VC5_DIRTY_FS_INPUTS))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ vc5_setup_shared_key(vc5, &key->base, &vc5->verttex);
+ key->base.shader_state = vc5->prog.bind_vs;
+ key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs;
+ STATIC_ASSERT(sizeof(key->fs_inputs) ==
+ sizeof(vc5->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->fs_inputs));
+ key->clamp_color = vc5->rasterizer->base.clamp_vertex_color;
+
+ key->per_vertex_point_size =
+ (prim_mode == PIPE_PRIM_POINTS &&
+ vc5->rasterizer->base.point_size_per_vertex);
+
+ struct vc5_compiled_shader *vs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (vs != vc5->prog.vs) {
+ vc5->prog.vs = vs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_VS;
+ }
+
+ key->is_coord = true;
+ /* Coord shaders only output varyings used by transform feedback. */
+ struct vc5_uncompiled_shader *shader_state = key->base.shader_state;
+ memcpy(key->fs_inputs, shader_state->tf_outputs,
+ sizeof(*key->fs_inputs) * shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_fs_inputs) {
+ memset(&key->fs_inputs[shader_state->num_tf_outputs],
+ 0,
+ sizeof(*key->fs_inputs) * (key->num_fs_inputs -
+ shader_state->num_tf_outputs));
+ }
+ key->num_fs_inputs = shader_state->num_tf_outputs;
+
+ struct vc5_compiled_shader *cs =
+ vc5_get_compiled_shader(vc5, &key->base);
+ if (cs != vc5->prog.cs) {
+ vc5->prog.cs = cs;
+ vc5->dirty |= VC5_DIRTY_COMPILED_CS;
+ }
+}
+
+void
+vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode)
+{
+ vc5_update_compiled_fs(vc5, prim_mode);
+ vc5_update_compiled_vs(vc5, prim_mode);
+}
+
+static uint32_t
+fs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
+}
+
+static uint32_t
+vs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
+}
+
+static bool
+fs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
+}
+
+static bool
+vs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
+}
+
+static void
+delete_from_cache_if_matches(struct hash_table *ht,
+ struct vc5_compiled_shader **last_compile,
+ struct hash_entry *entry,
+ struct vc5_uncompiled_shader *so)
+{
+ const struct v3d_key *key = entry->key;
+
+ if (key->shader_state == so) {
+ struct vc5_compiled_shader *shader = entry->data;
+ _mesa_hash_table_remove(ht, entry);
+ vc5_bo_unreference(&shader->bo);
+
+ if (shader == *last_compile)
+ *last_compile = NULL;
+
+ ralloc_free(shader);
+ }
+}
+
+static void
+vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_uncompiled_shader *so = hwcso;
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs,
+ entry, so);
+ }
+ hash_table_foreach(vc5->vs_cache, entry) {
+ delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs,
+ entry, so);
+ }
+
+ ralloc_free(so->base.ir.nir);
+ free(so);
+}
+
+static void
+vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_fs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS;
+}
+
+static void
+vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->prog.bind_vs = hwcso;
+ vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS;
+}
+
+void
+vc5_program_init(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ pctx->create_vs_state = vc5_shader_state_create;
+ pctx->delete_vs_state = vc5_shader_state_delete;
+
+ pctx->create_fs_state = vc5_shader_state_create;
+ pctx->delete_fs_state = vc5_shader_state_delete;
+
+ pctx->bind_fs_state = vc5_fp_state_bind;
+ pctx->bind_vs_state = vc5_vp_state_bind;
+
+ vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash,
+ fs_cache_compare);
+ vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
+ vs_cache_compare);
+}
+
+void
+vc5_program_fini(struct pipe_context *pctx)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ struct hash_entry *entry;
+ hash_table_foreach(vc5->fs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->fs_cache, entry);
+ }
+
+ hash_table_foreach(vc5->vs_cache, entry) {
+ struct vc5_compiled_shader *shader = entry->data;
+ vc5_bo_unreference(&shader->bo);
+ ralloc_free(shader);
+ _mesa_hash_table_remove(vc5->vs_cache, entry);
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_query.c b/lib/mesa/src/gallium/drivers/vc5/vc5_query.c
new file mode 100644
index 000000000..c114e76ee
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_query.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Stub support for occlusion queries.
+ *
+ * Since we expose support for GL 2.0, we have to expose occlusion queries,
+ * but the spec allows you to expose 0 query counter bits, so we just return 0
+ * as the result of all our queries.
+ */
+#include "vc5_context.h"
+
+struct vc5_query
+{
+ uint8_t pad;
+};
+
+static struct pipe_query *
+vc5_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+ struct vc5_query *query = calloc(1, sizeof(*query));
+
+ /* Note that struct pipe_query isn't actually defined anywhere. */
+ return (struct pipe_query *)query;
+}
+
+static void
+vc5_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ free(query);
+}
+
+static boolean
+vc5_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ return true;
+}
+
+static bool
+vc5_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ return true;
+}
+
+static boolean
+vc5_get_query_result(struct pipe_context *ctx, struct pipe_query *query,
+ boolean wait, union pipe_query_result *vresult)
+{
+ uint64_t *result = &vresult->u64;
+
+ *result = 0;
+
+ return true;
+}
+
+static void
+vc5_set_active_query_state(struct pipe_context *pipe, boolean enable)
+{
+}
+
+void
+vc5_query_init(struct pipe_context *pctx)
+{
+ pctx->create_query = vc5_create_query;
+ pctx->destroy_query = vc5_destroy_query;
+ pctx->begin_query = vc5_begin_query;
+ pctx->end_query = vc5_end_query;
+ pctx->get_query_result = vc5_get_query_result;
+ pctx->set_active_query_state = vc5_set_active_query_state;
+}
+
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c b/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c
new file mode 100644
index 000000000..4ef2d8379
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_rcl.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void
+vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
+{
+ /* Emit the generic list in our indirect state -- the rcl will just
+ * have pointers into it.
+ */
+ struct vc5_cl *cl = &job->indirect;
+ vc5_cl_ensure_space(cl, 200, 1);
+ struct vc5_cl_reloc tile_list_start = cl_get_address(cl);
+
+ const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
+ PIPE_CLEAR_COLOR1 |
+ PIPE_CLEAR_COLOR2 |
+ PIPE_CLEAR_COLOR3);
+ const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
+
+ uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
+
+ /* The initial reload will be queued until we get the
+ * tile coordinates.
+ */
+ if (read_but_not_cleared) {
+ cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
+ load.disable_colour_buffer_load =
+ (~read_but_not_cleared & pipe_clear_color_buffers) >>
+ first_color_buffer_bit;
+ load.enable_z_load =
+ read_but_not_cleared & PIPE_CLEAR_DEPTH;
+ load.enable_stencil_load =
+ read_but_not_cleared & PIPE_CLEAR_STENCIL;
+ }
+ }
+
+ /* Tile Coordinates triggers the reload and sets where the stores
+ * go. There must be one per store packet.
+ */
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+ uint32_t color_write_enables =
+ job->resolve >> first_color_buffer_bit;
+
+ store.disable_color_buffer_write = (~color_write_enables) & 0xf;
+ store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
+ store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
+
+ store.disable_colour_buffers_clear_on_write =
+ (job->cleared & pipe_clear_color_buffers) == 0;
+ store.disable_z_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_DEPTH);
+ store.disable_stencil_buffer_clear_on_write =
+ !(job->cleared & PIPE_CLEAR_STENCIL);
+ };
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = cl_get_address(cl);
+ }
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / b)
+
+void
+vc5_emit_rcl(struct vc5_job *job)
+{
+ /* The RCL list should be empty. */
+ assert(!job->rcl.bo);
+
+ vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 *
+ cl_packet_length(SUPERTILE_COORDINATES));
+ job->submit.rcl_start = job->rcl.bo->offset;
+ vc5_job_add_bo(job, job->rcl.bo);
+
+ int nr_cbufs = 0;
+ for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+ if (job->cbufs[i])
+ nr_cbufs = i + 1;
+ }
+
+ /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
+ * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are
+ * optional updates to the previous HW state.
+ */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
+ config) {
+ config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
+ config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
+
+ config.early_z_disable = !job->uses_early_z;
+
+ config.image_width_pixels = job->draw_width;
+ config.image_height_pixels = job->draw_height;
+
+ config.number_of_render_targets_minus_1 =
+ MAX2(nr_cbufs, 1) - 1;
+
+ config.multisample_mode_4x = job->msaa;
+
+ config.maximum_bpp_of_all_render_targets = job->internal_bpp;
+ }
+
+ for (int i = 0; i < nr_cbufs; i++) {
+ struct pipe_surface *psurf = job->cbufs[i];
+ if (!psurf)
+ continue;
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+ rt.address = cl_address(rsc->bo, surf->offset);
+ rt.internal_type = surf->internal_type;
+ rt.output_image_format = surf->format;
+ rt.memory_format = surf->tiling;
+ rt.internal_bpp = surf->internal_bpp;
+ rt.render_target_number = i;
+
+ if (job->resolve & PIPE_CLEAR_COLOR0 << i)
+ rsc->writes++;
+ }
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
+ clear) {
+ clear.clear_color_low_32_bits = job->clear_color[i][0];
+ clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
+ clear.render_target_number = i;
+ };
+
+ if (util_format_get_blocksize(psurf->format) > 7) {
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2,
+ clear) {
+ clear.clear_color_mid_low_32_bits =
+ ((job->clear_color[i][1] >> 24) |
+ (job->clear_color[i][2] << 8));
+ clear.clear_color_mid_high_24_bits =
+ ((job->clear_color[i][2] >> 24) |
+ ((job->clear_color[i][3] & 0xffff) << 8));
+ clear.render_target_number = i;
+ };
+ }
+
+ if (util_format_get_blocksize(psurf->format) > 14) {
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3,
+ clear) {
+ clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
+ clear.render_target_number = i;
+ };
+ }
+ }
+
+ /* TODO: Don't bother emitting if we don't load/clear Z/S. */
+ if (job->zsbuf) {
+ struct pipe_surface *psurf = job->zsbuf;
+ struct vc5_surface *surf = vc5_surface(psurf);
+ struct vc5_resource *rsc = vc5_resource(psurf->texture);
+
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
+ zs.address = cl_address(rsc->bo, surf->offset);
+
+ zs.internal_type = surf->internal_type;
+ zs.output_image_format = surf->format;
+
+ struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+ /* XXX */
+ zs.padded_height_of_output_image_in_uif_blocks =
+ (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp));
+
+ assert(surf->tiling != VC5_TILING_RASTER);
+ zs.memory_format = surf->tiling;
+ }
+
+ if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL)
+ rsc->writes++;
+ }
+
+ /* Ends rendering mode config. */
+ cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
+ clear) {
+ clear.z_clear_value = job->clear_z;
+ clear.stencil_vg_mask_clear_value = job->clear_s;
+ };
+
+ /* Always set initial block size before the first branch, which needs
+ * to match the value from binning mode config.
+ */
+ cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ uint32_t supertile_w = 1, supertile_h = 1;
+
+ /* If doing multicore binning, we would need to initialize each core's
+ * tile list here.
+ */
+ cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+ list.address = cl_address(job->tile_alloc, 0);
+ }
+
+ cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
+ uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
+ const uint32_t max_supertiles = 256;
+
+ /* Size up our supertiles until we get under the limit. */
+ for (;;) {
+ frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
+ supertile_w);
+ frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
+ supertile_h);
+ if (frame_w_in_supertiles * frame_h_in_supertiles <
+ max_supertiles) {
+ break;
+ }
+
+ if (supertile_w < supertile_h)
+ supertile_w++;
+ else
+ supertile_h++;
+ }
+
+ config.total_frame_width_in_tiles = job->draw_tiles_x;
+ config.total_frame_height_in_tiles = job->draw_tiles_y;
+
+ config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
+ config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
+
+ config.total_frame_width_in_supertiles = frame_w_in_supertiles;
+ config.total_frame_height_in_supertiles = frame_h_in_supertiles;
+ }
+
+ /* Start by clearing the tile buffer. */
+ cl_emit(&job->rcl, TILE_COORDINATES, coords) {
+ coords.tile_column_number = 0;
+ coords.tile_row_number = 0;
+ }
+
+ cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+
+ cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
+
+ vc5_rcl_emit_generic_per_tile_list(job);
+
+ cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
+
+ /* XXX: Use Morton order */
+ uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
+ uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
+ uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
+ uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
+ uint32_t max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
+ uint32_t max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
+
+ for (int y = min_y_supertile; y <= max_y_supertile; y++) {
+ for (int x = min_x_supertile; x <= max_x_supertile; x++) {
+ cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
+ coords.column_number_in_supertiles = x;
+ coords.row_number_in_supertiles = y;
+ }
+ }
+ }
+
+ cl_emit(&job->rcl, END_OF_RENDERING, end);
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c
new file mode 100644
index 000000000..42d58791f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.c
@@ -0,0 +1,751 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_blit.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
+
+#include "drm_fourcc.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "vc5_tiling.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
+#endif
+
+static bool
+vc5_resource_bo_alloc(struct vc5_resource *rsc)
+{
+ struct pipe_resource *prsc = &rsc->base;
+ struct pipe_screen *pscreen = prsc->screen;
+ struct vc5_bo *bo;
+ int layers = (prsc->target == PIPE_TEXTURE_3D ?
+ prsc->depth0 : prsc->array_size);
+
+ bo = vc5_bo_alloc(vc5_screen(pscreen),
+ rsc->slices[0].offset +
+ rsc->slices[0].size +
+ rsc->cube_map_stride * layers - 1,
+ "resource");
+ if (bo) {
+ DBG(V3D_DEBUG_SURFACE, "alloc %p @ 0x%08x:\n", rsc, bo->offset);
+ vc5_bo_unreference(&rsc->bo);
+ rsc->bo = bo;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void
+vc5_resource_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *ptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_transfer *trans = vc5_transfer(ptrans);
+
+ if (trans->map) {
+ struct vc5_resource *rsc;
+ struct vc5_resource_slice *slice;
+ if (trans->ss_resource) {
+ rsc = vc5_resource(trans->ss_resource);
+ slice = &rsc->slices[0];
+ } else {
+ rsc = vc5_resource(ptrans->resource);
+ slice = &rsc->slices[ptrans->level];
+ }
+
+ if (ptrans->usage & PIPE_TRANSFER_WRITE) {
+ vc5_store_tiled_image(rsc->bo->map + slice->offset +
+ ptrans->box.z * rsc->cube_map_stride,
+ slice->stride,
+ trans->map, ptrans->stride,
+ slice->tiling, rsc->cpp,
+ rsc->base.height0,
+ &ptrans->box);
+ }
+ free(trans->map);
+ }
+
+ if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) {
+ struct pipe_blit_info blit;
+ memset(&blit, 0, sizeof(blit));
+
+ blit.src.resource = trans->ss_resource;
+ blit.src.format = trans->ss_resource->format;
+ blit.src.box.width = trans->ss_box.width;
+ blit.src.box.height = trans->ss_box.height;
+ blit.src.box.depth = 1;
+
+ blit.dst.resource = ptrans->resource;
+ blit.dst.format = ptrans->resource->format;
+ blit.dst.level = ptrans->level;
+ blit.dst.box = trans->ss_box;
+
+ blit.mask = util_format_get_mask(ptrans->resource->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pctx->blit(pctx, &blit);
+
+ pipe_resource_reference(&trans->ss_resource, NULL);
+ }
+
+ pipe_resource_reference(&ptrans->resource, NULL);
+ slab_free(&vc5->transfer_pool, ptrans);
+}
+
+static struct pipe_resource *
+vc5_get_temp_resource(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ const struct pipe_box *box)
+{
+ struct pipe_resource temp_setup;
+
+ memset(&temp_setup, 0, sizeof(temp_setup));
+ temp_setup.target = prsc->target;
+ temp_setup.format = prsc->format;
+ temp_setup.width0 = box->width;
+ temp_setup.height0 = box->height;
+ temp_setup.depth0 = 1;
+ temp_setup.array_size = 1;
+
+ return pctx->screen->resource_create(pctx->screen, &temp_setup);
+}
+
+static void *
+vc5_resource_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned level, unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **pptrans)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_transfer *trans;
+ struct pipe_transfer *ptrans;
+ enum pipe_format format = prsc->format;
+ char *buf;
+
+ /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+ * being mapped.
+ */
+ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
+ prsc->last_level == 0 &&
+ prsc->width0 == box->width &&
+ prsc->height0 == box->height &&
+ prsc->depth0 == box->depth &&
+ prsc->array_size == 1 &&
+ rsc->bo->private) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ if (vc5_resource_bo_alloc(rsc)) {
+ /* If it might be bound as one of our vertex buffers
+ * or UBOs, make sure we re-emit vertex buffer state
+ * or uniforms.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+ if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER)
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+ } else {
+ /* If we failed to reallocate, flush users so that we
+ * don't violate any syncing requirements.
+ */
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ }
+ } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* If we're writing and the buffer is being used by the CL, we
+ * have to flush the CL first. If we're only reading, we need
+ * to flush if the CL has written our buffer.
+ */
+ if (usage & PIPE_TRANSFER_WRITE)
+ vc5_flush_jobs_reading_resource(vc5, prsc);
+ else
+ vc5_flush_jobs_writing_resource(vc5, prsc);
+ }
+
+ if (usage & PIPE_TRANSFER_WRITE) {
+ rsc->writes++;
+ rsc->initialized_buffers = ~0;
+ }
+
+ trans = slab_alloc(&vc5->transfer_pool);
+ if (!trans)
+ return NULL;
+
+ /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */
+
+ /* slab_alloc_st() doesn't zero: */
+ memset(trans, 0, sizeof(*trans));
+ ptrans = &trans->base;
+
+ pipe_resource_reference(&ptrans->resource, prsc);
+ ptrans->level = level;
+ ptrans->usage = usage;
+ ptrans->box = *box;
+
+ /* If the resource is multisampled, we need to resolve to single
+ * sample. This seems like it should be handled at a higher layer.
+ */
+ if (prsc->nr_samples > 1) {
+ trans->ss_resource = vc5_get_temp_resource(pctx, prsc, box);
+ if (!trans->ss_resource)
+ goto fail;
+ assert(!trans->ss_resource->nr_samples);
+
+ /* The ptrans->box gets modified for tile alignment, so save
+ * the original box for unmap time.
+ */
+ trans->ss_box = *box;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ struct pipe_blit_info blit;
+ memset(&blit, 0, sizeof(blit));
+
+ blit.src.resource = ptrans->resource;
+ blit.src.format = ptrans->resource->format;
+ blit.src.level = ptrans->level;
+ blit.src.box = trans->ss_box;
+
+ blit.dst.resource = trans->ss_resource;
+ blit.dst.format = trans->ss_resource->format;
+ blit.dst.box.width = trans->ss_box.width;
+ blit.dst.box.height = trans->ss_box.height;
+ blit.dst.box.depth = 1;
+
+ blit.mask = util_format_get_mask(prsc->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pctx->blit(pctx, &blit);
+ vc5_flush_jobs_writing_resource(vc5, blit.dst.resource);
+ }
+
+ /* The rest of the mapping process should use our temporary. */
+ prsc = trans->ss_resource;
+ rsc = vc5_resource(prsc);
+ ptrans->box.x = 0;
+ ptrans->box.y = 0;
+ ptrans->box.z = 0;
+ }
+
+ /* Note that the current kernel implementation is synchronous, so no
+ * need to do syncing stuff here yet.
+ */
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ buf = vc5_bo_map_unsynchronized(rsc->bo);
+ else
+ buf = vc5_bo_map(rsc->bo);
+ if (!buf) {
+ fprintf(stderr, "Failed to map bo\n");
+ goto fail;
+ }
+
+ *pptrans = ptrans;
+
+ struct vc5_resource_slice *slice = &rsc->slices[level];
+ if (rsc->tiled) {
+ /* No direct mappings of tiled, since we need to manually
+ * tile/untile.
+ */
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ ptrans->stride = ptrans->box.width * rsc->cpp;
+ ptrans->layer_stride = ptrans->stride * ptrans->box.height;
+
+ trans->map = malloc(ptrans->layer_stride * ptrans->box.depth);
+
+ if (usage & PIPE_TRANSFER_READ) {
+ vc5_load_tiled_image(trans->map, ptrans->stride,
+ buf + slice->offset +
+ ptrans->box.z * rsc->cube_map_stride,
+ slice->stride,
+ slice->tiling, rsc->cpp,
+ rsc->base.height0,
+ &ptrans->box);
+ }
+ return trans->map;
+ } else {
+ ptrans->stride = slice->stride;
+ ptrans->layer_stride = ptrans->stride;
+
+ return buf + slice->offset +
+ ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride +
+ ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp +
+ ptrans->box.z * rsc->cube_map_stride;
+ }
+
+
+fail:
+ vc5_resource_transfer_unmap(pctx, ptrans);
+ return NULL;
+}
+
+static void
+vc5_resource_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *prsc)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ vc5_bo_unreference(&rsc->bo);
+ free(rsc);
+}
+
+static boolean
+vc5_resource_get_handle(struct pipe_screen *pscreen,
+ struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct vc5_resource *rsc = vc5_resource(prsc);
+ struct vc5_bo *bo = rsc->bo;
+
+ whandle->stride = rsc->slices[0].stride;
+
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching).
+ */
+ bo->private = false;
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ return vc5_bo_flink(bo, &whandle->handle);
+ case DRM_API_HANDLE_TYPE_KMS:
+ whandle->handle = bo->handle;
+ return TRUE;
+ case DRM_API_HANDLE_TYPE_FD:
+ whandle->handle = vc5_bo_get_dmabuf(bo);
+ return whandle->handle != -1;
+ }
+
+ return FALSE;
+}
+
+static void
+vc5_setup_slices(struct vc5_resource *rsc, const char *caller)
+{
+ struct pipe_resource *prsc = &rsc->base;
+ uint32_t width = prsc->width0;
+ uint32_t height = prsc->height0;
+ uint32_t pot_width = util_next_power_of_two(width);
+ uint32_t pot_height = util_next_power_of_two(height);
+ uint32_t offset = 0;
+ uint32_t utile_w = vc5_utile_width(rsc->cpp);
+ uint32_t utile_h = vc5_utile_height(rsc->cpp);
+ uint32_t uif_block_w = utile_w * 2;
+ uint32_t uif_block_h = utile_h * 2;
+ bool uif_top = false;
+
+ for (int i = prsc->last_level; i >= 0; i--) {
+ struct vc5_resource_slice *slice = &rsc->slices[i];
+
+ uint32_t level_width, level_height;
+ if (i < 2) {
+ level_width = u_minify(width, i);
+ level_height = u_minify(height, i);
+ } else {
+ level_width = u_minify(pot_width, i);
+ level_height = u_minify(pot_height, i);
+ }
+
+ if (!rsc->tiled) {
+ slice->tiling = VC5_TILING_RASTER;
+ if (prsc->nr_samples > 1) {
+ /* MSAA (4x) surfaces are stored as raw tile buffer contents. */
+ level_width = align(level_width, 32);
+ level_height = align(level_height, 32);
+ }
+ } else {
+ if ((i != 0 || !uif_top) &&
+ (level_width <= utile_w ||
+ level_height <= utile_h)) {
+ slice->tiling = VC5_TILING_LINEARTILE;
+ level_width = align(level_width, utile_w);
+ level_height = align(level_height, utile_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
+ level_width = align(level_width, uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else if ((i != 0 || !uif_top) &&
+ level_width <= 2 * uif_block_w) {
+ slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
+ level_width = align(level_width, 2 * uif_block_w);
+ level_height = align(level_height, uif_block_h);
+ } else {
+ slice->tiling = VC5_TILING_UIF_NO_XOR;
+
+ level_width = align(level_width,
+ 4 * uif_block_w);
+ level_height = align(level_height,
+ 4 * uif_block_h);
+ }
+ }
+
+ slice->offset = offset;
+ slice->stride = (level_width * rsc->cpp *
+ MAX2(prsc->nr_samples, 1));
+ slice->size = level_height * slice->stride;
+
+ offset += slice->size;
+
+ if (V3D_DEBUG & V3D_DEBUG_SURFACE) {
+ static const char *const tiling_descriptions[] = {
+ [VC5_TILING_RASTER] = "R",
+ [VC5_TILING_LINEARTILE] = "LT",
+ [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1",
+ [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2",
+ [VC5_TILING_UIF_NO_XOR] = "UIF",
+ [VC5_TILING_UIF_XOR] = "UIF^",
+ };
+
+ fprintf(stderr,
+ "rsc %s %p (format %s), %dx%d: "
+ "level %d (%s) %dx%d -> %dx%d, stride %d@0x%08x\n",
+ caller, rsc,
+ util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ i, tiling_descriptions[slice->tiling],
+ u_minify(prsc->width0, i),
+ u_minify(prsc->height0, i),
+ level_width, level_height,
+ slice->stride, slice->offset);
+ }
+ }
+
+ /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only
+ * needs to be aligned to utile boundaries. Since tiles are laid out
+ * from small to big in memory, we need to align the later UIF slices
+ * to UIF blocks, if they were preceded by non-UIF-block-aligned LT
+ * slices.
+ *
+ * We additionally align to 4k, which improves UIF XOR performance.
+ */
+ uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) -
+ rsc->slices[0].offset);
+ if (page_align_offset) {
+ for (int i = 0; i <= prsc->last_level; i++)
+ rsc->slices[i].offset += page_align_offset;
+ }
+
+ /* Arrays, cubes, and 3D textures have a stride which is the distance
+ * from one full mipmap tree to the next (64b aligned).
+ */
+ rsc->cube_map_stride = align(rsc->slices[0].offset +
+ rsc->slices[0].size, 64);
+}
+
+static struct vc5_resource *
+vc5_resource_setup(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource);
+ if (!rsc)
+ return NULL;
+ struct pipe_resource *prsc = &rsc->base;
+
+ *prsc = *tmpl;
+
+ pipe_reference_init(&prsc->reference, 1);
+ prsc->screen = pscreen;
+
+ if (prsc->nr_samples <= 1)
+ rsc->cpp = util_format_get_blocksize(tmpl->format);
+ else
+ rsc->cpp = sizeof(uint32_t);
+
+ assert(rsc->cpp);
+
+ return rsc;
+}
+
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (haystack[i] == needle)
+ return true;
+ }
+
+ return false;
+}
+
+static struct pipe_resource *
+vc5_resource_create_with_modifiers(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ const uint64_t *modifiers,
+ int count)
+{
+ bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base;
+ /* Use a tiled layout if we can, for better 3D performance. */
+ bool should_tile = true;
+
+ /* VBOs/PBOs are untiled (and 1 height). */
+ if (tmpl->target == PIPE_BUFFER)
+ should_tile = false;
+
+ /* Cursors are always linear, and the user can request linear as well.
+ */
+ if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR))
+ should_tile = false;
+
+ /* Scanout BOs for simulator need to be linear for interaction with
+ * i965.
+ */
+ if (using_vc5_simulator &&
+ tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+ should_tile = false;
+
+ /* No user-specified modifier; determine our own. */
+ if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
+ linear_ok = true;
+ rsc->tiled = should_tile;
+ } else if (should_tile &&
+ find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+ modifiers, count)) {
+ rsc->tiled = true;
+ } else if (linear_ok) {
+ rsc->tiled = false;
+ } else {
+ fprintf(stderr, "Unsupported modifier requested\n");
+ return NULL;
+ }
+
+ if (tmpl->target != PIPE_BUFFER)
+ rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+ vc5_setup_slices(rsc, "create");
+ if (!vc5_resource_bo_alloc(rsc))
+ goto fail;
+
+ return prsc;
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+struct pipe_resource *
+vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl)
+{
+ const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+ return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
+static struct pipe_resource *
+vc5_resource_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+ struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl);
+ struct pipe_resource *prsc = &rsc->base;
+ struct vc5_resource_slice *slice = &rsc->slices[0];
+
+ if (!rsc)
+ return NULL;
+
+ switch (whandle->modifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ rsc->tiled = false;
+ break;
+ /* XXX: UIF */
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported modifier 0x%llx\n",
+ (long long)whandle->modifier);
+ goto fail;
+ }
+
+ if (whandle->offset != 0) {
+ fprintf(stderr,
+ "Attempt to import unsupported winsys offset %u\n",
+ whandle->offset);
+ goto fail;
+ }
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ rsc->bo = vc5_bo_open_name(screen,
+ whandle->handle, whandle->stride);
+ break;
+ case DRM_API_HANDLE_TYPE_FD:
+ rsc->bo = vc5_bo_open_dmabuf(screen,
+ whandle->handle, whandle->stride);
+ break;
+ default:
+ fprintf(stderr,
+ "Attempt to import unsupported handle type %d\n",
+ whandle->type);
+ goto fail;
+ }
+
+ if (!rsc->bo)
+ goto fail;
+
+ vc5_setup_slices(rsc, "import");
+
+ rsc->tex_format = vc5_get_tex_format(prsc->format);
+
+ DBG(V3D_DEBUG_SURFACE,
+ "rsc import %p (format %s), %dx%d: "
+ "level 0 (R) -> stride %d@0x%08x\n",
+ rsc, util_format_short_name(prsc->format),
+ prsc->width0, prsc->height0,
+ slice->stride, slice->offset);
+
+ if (whandle->stride != slice->stride) {
+ static bool warned = false;
+ if (!warned) {
+ warned = true;
+ fprintf(stderr,
+ "Attempting to import %dx%d %s with "
+ "unsupported stride %d instead of %d\n",
+ prsc->width0, prsc->height0,
+ util_format_short_name(prsc->format),
+ whandle->stride,
+ slice->stride);
+ }
+ goto fail;
+ }
+
+ return prsc;
+
+fail:
+ vc5_resource_destroy(pscreen, prsc);
+ return NULL;
+}
+
+static struct pipe_surface *
+vc5_create_surface(struct pipe_context *pctx,
+ struct pipe_resource *ptex,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface);
+ struct vc5_resource *rsc = vc5_resource(ptex);
+
+ if (!surface)
+ return NULL;
+
+ assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+ struct pipe_surface *psurf = &surface->base;
+ unsigned level = surf_tmpl->u.tex.level;
+
+ pipe_reference_init(&psurf->reference, 1);
+ pipe_resource_reference(&psurf->texture, ptex);
+
+ psurf->context = pctx;
+ psurf->format = surf_tmpl->format;
+ psurf->width = u_minify(ptex->width0, level);
+ psurf->height = u_minify(ptex->height0, level);
+ psurf->u.tex.level = level;
+ psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+ surface->offset = (rsc->slices[level].offset +
+ psurf->u.tex.first_layer * rsc->cube_map_stride);
+ surface->tiling = rsc->slices[level].tiling;
+ surface->format = vc5_get_rt_format(psurf->format);
+
+ if (util_format_is_depth_or_stencil(psurf->format)) {
+ switch (psurf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_16;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_32F;
+ break;
+ default:
+ surface->internal_type = INTERNAL_TYPE_DEPTH_24;
+ }
+ } else {
+ uint32_t bpp, type;
+ vc5_get_internal_type_bpp_for_output_format(surface->format,
+ &type, &bpp);
+ surface->internal_type = type;
+ surface->internal_bpp = bpp;
+ }
+
+ return &surface->base;
+}
+
+static void
+vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+ pipe_resource_reference(&psurf->texture, NULL);
+ FREE(psurf);
+}
+
+static void
+vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
+{
+ /* All calls to flush_resource are followed by a flush of the context,
+ * so there's nothing to do.
+ */
+}
+
+void
+vc5_resource_screen_init(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create_with_modifiers =
+ vc5_resource_create_with_modifiers;
+ pscreen->resource_create = vc5_resource_create;
+ pscreen->resource_from_handle = vc5_resource_from_handle;
+ pscreen->resource_get_handle = vc5_resource_get_handle;
+ pscreen->resource_destroy = vc5_resource_destroy;
+}
+
+void
+vc5_resource_context_init(struct pipe_context *pctx)
+{
+ pctx->transfer_map = vc5_resource_transfer_map;
+ pctx->transfer_flush_region = u_default_transfer_flush_region;
+ pctx->transfer_unmap = vc5_resource_transfer_unmap;
+ pctx->buffer_subdata = u_default_buffer_subdata;
+ pctx->texture_subdata = u_default_texture_subdata;
+ pctx->create_surface = vc5_create_surface;
+ pctx->surface_destroy = vc5_surface_destroy;
+ pctx->resource_copy_region = util_resource_copy_region;
+ pctx->blit = vc5_blit;
+ pctx->flush_resource = vc5_flush_resource;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h
new file mode 100644
index 000000000..e65cb8c8f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_resource.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_RESOURCE_H
+#define VC5_RESOURCE_H
+
+#include "vc5_screen.h"
+#include "util/u_transfer.h"
+
+/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These
+ * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB
+ * page. Those pages are then arranged left-to-right, top-to-bottom, to cover
+ * an image.
+ *
+ * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte
+ * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp).
+ */
+
+/**
+ * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory
+ * Format field of render target and Z/Stencil config.
+ */
+enum vc5_tiling_mode {
+ /* Untiled resources. Not valid as texture inputs. */
+ VC5_TILING_RASTER,
+
+ /* Single line of u-tiles. */
+ VC5_TILING_LINEARTILE,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_1_COLUMN,
+
+ /* Departure from standard 4-UIF block column format. */
+ VC5_TILING_UBLINEAR_2_COLUMN,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_NO_XOR,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ VC5_TILING_UIF_XOR,
+};
+
+struct vc5_transfer {
+ struct pipe_transfer base;
+ void *map;
+
+ struct pipe_resource *ss_resource;
+ struct pipe_box ss_box;
+};
+
+struct vc5_resource_slice {
+ uint32_t offset;
+ uint32_t stride;
+ uint32_t size;
+ enum vc5_tiling_mode tiling;
+};
+
+struct vc5_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ enum vc5_tiling_mode tiling;
+ /**
+ * Output image format for TILE_RENDERING_MODE_CONFIGURATION
+ */
+ uint8_t format;
+
+ /**
+ * Internal format of the tile buffer for
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_type;
+
+ /**
+ * internal bpp value (0=32bpp, 2=128bpp) for color buffers in
+ * TILE_RENDERING_MODE_CONFIGURATION.
+ */
+ uint8_t internal_bpp;
+};
+
+struct vc5_resource {
+ struct pipe_resource base;
+ struct vc5_bo *bo;
+ struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS];
+ uint32_t cube_map_stride;
+ int cpp;
+ bool tiled;
+ /** One of V3D_TEXTURE_DATA_FORMAT_* */
+ uint8_t tex_format;
+
+ /**
+ * Number of times the resource has been written to.
+ *
+ * This is used to track whether we need to load the surface on first
+ * rendering.
+ */
+ uint64_t writes;
+
+ /**
+ * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL
+ * for which parts of the resource are defined.
+ *
+ * Used for avoiding fallback to quad clears for clearing just depth,
+ * when the stencil contents have never been initialized. Note that
+ * we're lazy and fields not present in the buffer (DEPTH in a color
+ * buffer) may get marked.
+ */
+ uint32_t initialized_buffers;
+};
+
+static inline struct vc5_resource *
+vc5_resource(struct pipe_resource *prsc)
+{
+ return (struct vc5_resource *)prsc;
+}
+
+static inline struct vc5_surface *
+vc5_surface(struct pipe_surface *psurf)
+{
+ return (struct vc5_surface *)psurf;
+}
+
+static inline struct vc5_transfer *
+vc5_transfer(struct pipe_transfer *ptrans)
+{
+ return (struct vc5_transfer *)ptrans;
+}
+
+void vc5_resource_screen_init(struct pipe_screen *pscreen);
+void vc5_resource_context_init(struct pipe_context *pctx);
+struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmpl);
+
+#endif /* VC5_RESOURCE_H */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c
new file mode 100644
index 000000000..66180d27e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "os/os_misc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_hash_table.h"
+#include "util/ralloc.h"
+
+#include <xf86drm.h>
+#include "vc5_drm.h"
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_resource.h"
+#include "compiler/v3d_compiler.h"
+
+static const char *
+vc5_screen_get_name(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ if (!screen->name) {
+ screen->name = ralloc_asprintf(screen,
+ "VC5 V3D %d.%d",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ }
+
+ return screen->name;
+}
+
+static const char *
+vc5_screen_get_vendor(struct pipe_screen *pscreen)
+{
+ return "Broadcom";
+}
+
+static void
+vc5_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct vc5_screen *screen = vc5_screen(pscreen);
+
+ util_hash_table_destroy(screen->bo_handles);
+ vc5_bufmgr_destroy(pscreen);
+ slab_destroy_parent(&screen->transfer_pool);
+
+ if (using_vc5_simulator)
+ vc5_simulator_destroy(screen);
+
+ v3d_compiler_free(screen->compiler);
+
+ close(screen->fd);
+ ralloc_free(pscreen);
+}
+
+static int
+vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_INDEP_BLEND_ENABLE: /* XXX */
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+ case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+ case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 1;
+
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 4;
+
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 400;
+
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+ return 1;
+
+
+ /* Stream output. */
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 64;
+
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ return 7;
+
+ /* Unsupported features. */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_FAKE_SW_MSAA:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_CULL_DISTANCE:
+ case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+ case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+ case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+ case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+ case PIPE_CAP_TGSI_FS_FBFETCH:
+ case PIPE_CAP_INT64:
+ case PIPE_CAP_INT64_DIVMOD:
+ case PIPE_CAP_DOUBLES:
+ case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+ case PIPE_CAP_TGSI_BALLOT:
+ case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+ case PIPE_CAP_TGSI_CLOCK:
+ case PIPE_CAP_TGSI_TEX_TXF_LZ:
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TILE_RASTER_ORDER:
+ return 0;
+
+ /* Geometry shader output, unsupported. */
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return VC5_MAX_MIP_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 256;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 2048;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+
+ /* Queries. */
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 0;
+
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
+
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0x14E4;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 1;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int)(system_memory >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return 1;
+
+ default:
+ fprintf(stderr, "unknown param %d\n", param);
+ return 0;
+ }
+}
+
+static float
+vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 32;
+
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 512.0f;
+
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 0.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 0.0f;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0f;
+ default:
+ fprintf(stderr, "unknown paramf %d\n", param);
+ return 0;
+ }
+}
+
+static int
+vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ if (shader != PIPE_SHADER_VERTEX &&
+ shader != PIPE_SHADER_FRAGMENT) {
+ return 0;
+ }
+
+ /* this is probably not totally correct.. but it's a start: */
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return UINT_MAX;
+
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return VC5_MAX_FS_INPUTS / 4;
+ else
+ return 16;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return shader == PIPE_SHADER_FRAGMENT ? 4 : 8;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ return 16 * 1024 * sizeof(float);
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return 16;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return VC5_MAX_TEXTURE_SAMPLERS;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_NIR;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
+ case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ return 0;
+ default:
+ fprintf(stderr, "unknown shader param %d\n", param);
+ return 0;
+ }
+ return 0;
+}
+
+static boolean
+vc5_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage)
+{
+ unsigned retval = 0;
+
+ if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES)
+ return FALSE;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ !util_format_is_supported(format, usage)) {
+ return FALSE;
+ }
+
+ if (usage & PIPE_BIND_VERTEX_BUFFER) {
+ switch (format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ case PIPE_FORMAT_R32G32_FLOAT:
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ case PIPE_FORMAT_R32G32_SNORM:
+ case PIPE_FORMAT_R32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ case PIPE_FORMAT_R32G32_SSCALED:
+ case PIPE_FORMAT_R32_SSCALED:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ case PIPE_FORMAT_R16G16_USCALED:
+ case PIPE_FORMAT_R16_USCALED:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16_SSCALED:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ case PIPE_FORMAT_R8G8_USCALED:
+ case PIPE_FORMAT_R8_USCALED:
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ case PIPE_FORMAT_R8G8_SSCALED:
+ case PIPE_FORMAT_R8_SSCALED:
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if ((usage & PIPE_BIND_RENDER_TARGET) &&
+ vc5_rt_format_supported(format)) {
+ retval |= PIPE_BIND_RENDER_TARGET;
+ }
+
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ vc5_tex_format_supported(format)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
+ format == PIPE_FORMAT_X8Z24_UNORM ||
+ format == PIPE_FORMAT_Z16_UNORM ||
+ format == PIPE_FORMAT_Z32_FLOAT ||
+ format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (format == PIPE_FORMAT_I8_UINT ||
+ format == PIPE_FORMAT_I16_UINT ||
+ format == PIPE_FORMAT_I32_UINT)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+#if 0
+ if (retval != usage) {
+ fprintf(stderr,
+ "not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=0x%x, retval=0x%x\n", util_format_name(format),
+ target, sample_count, usage, retval);
+ }
+#endif
+
+ return retval == usage;
+}
+
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
+static bool
+vc5_get_device_info(struct vc5_screen *screen)
+{
+ struct drm_vc5_get_param ident0 = {
+ .param = DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+ };
+ struct drm_vc5_get_param ident1 = {
+ .param = DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+ };
+ int ret;
+
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident0);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n",
+ strerror(errno));
+ return false;
+ }
+ ret = vc5_ioctl(screen->fd, DRM_IOCTL_VC5_GET_PARAM, &ident1);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ uint32_t major = (ident0.value >> 24) & 0xff;
+ uint32_t minor = (ident1.value >> 0) & 0xf;
+ screen->devinfo.ver = major * 10 + minor;
+
+ if (screen->devinfo.ver != 33) {
+ fprintf(stderr,
+ "V3D %d.%d not supported by this version of Mesa.\n",
+ screen->devinfo.ver / 10,
+ screen->devinfo.ver % 10);
+ return false;
+ }
+
+ return true;
+}
+
+static const void *
+vc5_screen_get_compiler_options(struct pipe_screen *pscreen,
+ enum pipe_shader_ir ir, unsigned shader)
+{
+ return &v3d_nir_options;
+}
+
+struct pipe_screen *
+vc5_screen_create(int fd)
+{
+ struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen);
+ struct pipe_screen *pscreen;
+
+ pscreen = &screen->base;
+
+ pscreen->destroy = vc5_screen_destroy;
+ pscreen->get_param = vc5_screen_get_param;
+ pscreen->get_paramf = vc5_screen_get_paramf;
+ pscreen->get_shader_param = vc5_screen_get_shader_param;
+ pscreen->context_create = vc5_context_create;
+ pscreen->is_format_supported = vc5_screen_is_format_supported;
+
+ screen->fd = fd;
+ list_inithead(&screen->bo_cache.time_list);
+ (void)mtx_init(&screen->bo_handles_mutex, mtx_plain);
+ screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+
+#if defined(USE_VC5_SIMULATOR)
+ vc5_simulator_init(screen);
+#endif
+
+ if (!vc5_get_device_info(screen))
+ goto fail;
+
+ slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16);
+
+ vc5_fence_init(screen);
+
+ v3d_process_debug_variable();
+
+ vc5_resource_screen_init(pscreen);
+
+ screen->compiler = v3d_compiler_init(&screen->devinfo);
+
+ pscreen->get_name = vc5_screen_get_name;
+ pscreen->get_vendor = vc5_screen_get_vendor;
+ pscreen->get_device_vendor = vc5_screen_get_vendor;
+ pscreen->get_compiler_options = vc5_screen_get_compiler_options;
+
+ return pscreen;
+
+fail:
+ close(fd);
+ ralloc_free(pscreen);
+ return NULL;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h
new file mode 100644
index 000000000..28925d791
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_screen.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_SCREEN_H
+#define VC5_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "os/os_thread.h"
+#include "state_tracker/drm_driver.h"
+#include "util/list.h"
+#include "util/slab.h"
+#include "broadcom/common/v3d_debug.h"
+#include "broadcom/common/v3d_device_info.h"
+
+struct vc5_bo;
+
+#define VC5_MAX_MIP_LEVELS 12
+#define VC5_MAX_TEXTURE_SAMPLERS 32
+#define VC5_MAX_SAMPLES 4
+#define VC5_MAX_DRAW_BUFFERS 4
+#define VC5_MAX_ATTRIBUTES 16
+
+struct vc5_simulator_file;
+
+struct vc5_screen {
+ struct pipe_screen base;
+ int fd;
+
+ struct v3d_device_info devinfo;
+
+ const char *name;
+
+ /** The last seqno we've completed a wait for.
+ *
+ * This lets us slightly optimize our waits by skipping wait syscalls
+ * if we know the job's already done.
+ */
+ uint64_t finished_seqno;
+
+ struct slab_parent_pool transfer_pool;
+
+ struct vc5_bo_cache {
+ /** List of struct vc5_bo freed, by age. */
+ struct list_head time_list;
+ /** List of struct vc5_bo freed, per size, by age. */
+ struct list_head *size_list;
+ uint32_t size_list_size;
+
+ mtx_t lock;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+ } bo_cache;
+
+ const struct v3d_compiler *compiler;
+
+ struct util_hash_table *bo_handles;
+ mtx_t bo_handles_mutex;
+
+ uint32_t bo_size;
+ uint32_t bo_count;
+
+ struct vc5_simulator_file *sim_file;
+};
+
+static inline struct vc5_screen *
+vc5_screen(struct pipe_screen *screen)
+{
+ return (struct vc5_screen *)screen;
+}
+
+struct pipe_screen *vc5_screen_create(int fd);
+
+void
+vc5_fence_init(struct vc5_screen *screen);
+
+struct vc5_fence *
+vc5_fence_create(struct vc5_screen *screen, uint64_t seqno);
+
+#endif /* VC5_SCREEN_H */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c b/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c
new file mode 100644
index 000000000..fc6a38d37
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_simulator.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc5_simulator.c
+ *
+ * Implements VC5 simulation on top of a non-VC5 GEM fd.
+ *
+ * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on
+ * top of the simpenrose software simulator. Generally, VC5 driver BOs have a
+ * GEM-side copy of their contents and a simulator-side memory area that the
+ * GEM contents get copied into during simulation. Once simulation is done,
+ * the simulator's data is copied back out to the GEM BOs, so that rendering
+ * appears on the screen as if actual hardware rendering had been done.
+ *
+ * One of the limitations of this code is that we shouldn't really need a
+ * GEM-side BO for non-window-system BOs. However, do we need unique BO
+ * handles for each of our GEM bos so that this file can look up its state
+ * from the handle passed in at submit ioctl time (also, a couple of places
+ * outside of this file still call ioctls directly on the fd).
+ *
+ * Another limitation is that BO import doesn't work unless the underlying
+ * window system's BO size matches what VC5 is going to use, which of course
+ * doesn't work out in practice. This means that for now, only DRI3 (VC5
+ * makes the winsys BOs) is supported, not DRI2 (window system makes the winys
+ * BOs).
+ */
+
+#ifdef USE_VC5_SIMULATOR
+
+#include <sys/mman.h>
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/u_memory.h"
+#include "util/u_mm.h"
+
+#define HW_REGISTER_RO(x) (x)
+#define HW_REGISTER_RW(x) (x)
+#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
+
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#define V3D_TECH_VERSION 3
+#define V3D_REVISION 3
+#define V3D_SUB_REV 0
+#define V3D_HIDDEN_REV 0
+#undef unreachable
+#include "v3d_hw_auto.h"
+
+/** Global (across GEM fds) state for the simulator */
+static struct vc5_simulator_state {
+ mtx_t mutex;
+
+ struct v3d_hw *v3d;
+
+ /* Base virtual address of the heap. */
+ void *mem;
+ /* Base hardware address of the heap. */
+ uint32_t mem_base;
+ /* Size of the heap. */
+ size_t mem_size;
+
+ struct mem_block *heap;
+ struct mem_block *overflow;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *fd_map;
+
+ int refcount;
+} sim_state = {
+ .mutex = _MTX_INITIALIZER_NP,
+};
+
+/** Per-GEM-fd state for the simulator. */
+struct vc5_simulator_file {
+ int fd;
+
+ /** Mapping from GEM handle to struct vc5_simulator_bo * */
+ struct hash_table *bo_map;
+
+ struct mem_block *gmp;
+ void *gmp_vaddr;
+};
+
+/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */
+struct vc5_simulator_bo {
+ struct vc5_simulator_file *file;
+
+ /** Area for this BO within sim_state->mem */
+ struct mem_block *block;
+ uint32_t size;
+ void *vaddr;
+
+ void *winsys_map;
+ uint32_t winsys_stride;
+
+ int handle;
+};
+
+static void *
+int_to_key(int key)
+{
+ return (void *)(uintptr_t)key;
+}
+
+static struct vc5_simulator_file *
+vc5_get_simulator_file_for_fd(int fd)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map,
+ int_to_key(fd + 1));
+ return entry ? entry->data : NULL;
+}
+
+/* A marker placed just after each BO, then checked after rendering to make
+ * sure it's still there.
+ */
+#define BO_SENTINEL 0xfedcba98
+
+/* 128kb */
+#define GMP_ALIGN2 17
+
+/**
+ * Sets the range of GPU virtual address space to have the given GMP
+ * permissions (bit 0 = read, bit 1 = write, write-only forbidden).
+ */
+static void
+set_gmp_flags(struct vc5_simulator_file *file,
+ uint32_t offset, uint32_t size, uint32_t flag)
+{
+ assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0);
+ int gmp_offset = offset >> GMP_ALIGN2;
+ int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2;
+ uint32_t *gmp = file->gmp_vaddr;
+
+ assert(flag <= 0x3);
+
+ for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) {
+ int32_t bitshift = (i % 16) * 2;
+ gmp[i / 16] &= ~(0x3 << bitshift);
+ gmp[i / 16] |= flag << bitshift;
+ }
+}
+
+/**
+ * Allocates space in simulator memory and returns a tracking struct for it
+ * that also contains the drm_gem_cma_object struct.
+ */
+static struct vc5_simulator_bo *
+vc5_create_simulator_bo(int fd, int handle, unsigned size)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = rzalloc(file,
+ struct vc5_simulator_bo);
+ size = align(size, 4096);
+
+ sim_bo->file = file;
+ sim_bo->handle = handle;
+
+ mtx_lock(&sim_state.mutex);
+ sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0);
+ mtx_unlock(&sim_state.mutex);
+ assert(sim_bo->block);
+
+ set_gmp_flags(file, sim_bo->block->ofs, size, 0x3);
+
+ sim_bo->size = size;
+ sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
+ memset(sim_bo->vaddr, 0xd0, size);
+
+ *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL;
+
+ /* A handle of 0 is used for vc5_gem.c internal allocations that
+ * don't need to go in the lookup table.
+ */
+ if (handle != 0) {
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(file->bo_map, int_to_key(handle),
+ sim_bo);
+ mtx_unlock(&sim_state.mutex);
+ }
+
+ return sim_bo;
+}
+
+static void
+vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo)
+{
+ struct vc5_simulator_file *sim_file = sim_bo->file;
+
+ if (sim_bo->winsys_map)
+ munmap(sim_bo->winsys_map, sim_bo->size);
+
+ set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0);
+
+ mtx_lock(&sim_state.mutex);
+ u_mmFreeMem(sim_bo->block);
+ if (sim_bo->handle) {
+ struct hash_entry *entry =
+ _mesa_hash_table_search(sim_file->bo_map,
+ int_to_key(sim_bo->handle));
+ _mesa_hash_table_remove(sim_file->bo_map, entry);
+ }
+ mtx_unlock(&sim_state.mutex);
+ ralloc_free(sim_bo);
+}
+
+static struct vc5_simulator_bo *
+vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle)
+{
+ mtx_lock(&sim_state.mutex);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle));
+ mtx_unlock(&sim_state.mutex);
+
+ return entry ? entry->data : NULL;
+}
+
+static int
+vc5_simulator_pin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ vc5_bo_map(bo);
+ memcpy(sim_bo->vaddr, bo->map, bo->size);
+ }
+
+ return 0;
+}
+
+static int
+vc5_simulator_unpin_bos(int fd, struct vc5_job *job)
+{
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct set_entry *entry;
+
+ set_foreach(job->bos, entry) {
+ struct vc5_bo *bo = (struct vc5_bo *)entry->key;
+ struct vc5_simulator_bo *sim_bo =
+ vc5_get_simulator_bo(file, bo->handle);
+
+ assert(*(uint32_t *)(sim_bo->vaddr +
+ sim_bo->size) == BO_SENTINEL);
+
+ vc5_bo_map(bo);
+ memcpy(bo->map, sim_bo->vaddr, bo->size);
+ }
+
+ return 0;
+}
+
+#if 0
+static void
+vc5_dump_to_file(struct vc5_exec_info *exec)
+{
+ static int dumpno = 0;
+ struct drm_vc5_get_hang_state *state;
+ struct drm_vc5_get_hang_state_bo *bo_state;
+ unsigned int dump_version = 0;
+
+ if (!(vc5_debug & VC5_DEBUG_DUMP))
+ return;
+
+ state = calloc(1, sizeof(*state));
+
+ int unref_count = 0;
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ unref_count++;
+ }
+
+ /* Add one more for the overflow area that isn't wrapped in a BO. */
+ state->bo_count = exec->bo_count + unref_count + 1;
+ bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+ char *filename = NULL;
+ asprintf(&filename, "vc5-dri-%d.dump", dumpno++);
+ FILE *f = fopen(filename, "w+");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s: %s", filename,
+ strerror(errno));
+ return;
+ }
+
+ fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+ state->ct0ca = exec->ct0ca;
+ state->ct0ea = exec->ct0ea;
+ state->ct1ca = exec->ct1ca;
+ state->ct1ea = exec->ct1ea;
+ state->start_bin = exec->ct0ca;
+ state->start_render = exec->ct1ca;
+ fwrite(state, sizeof(*state), 1, f);
+
+ int i;
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ bo_state[i].handle = i; /* Not used by the parser. */
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ bo_state[i].handle = 0;
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ i++;
+ }
+
+ /* Add the static overflow memory area. */
+ bo_state[i].handle = exec->bo_count;
+ bo_state[i].paddr = sim_state.overflow->ofs;
+ bo_state[i].size = sim_state.overflow->size;
+ i++;
+
+ fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+ for (int i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ void *overflow = calloc(1, sim_state.overflow->size);
+ fwrite(overflow, 1, sim_state.overflow->size, f);
+ free(overflow);
+
+ free(state);
+ free(bo_state);
+ fclose(f);
+}
+#endif
+
+#define V3D_WRITE(reg, val) v3d_hw_write_reg(sim_state.v3d, reg, val)
+#define V3D_READ(reg) v3d_hw_read_reg(sim_state.v3d, reg)
+
+static void
+vc5_flush_l3(void)
+{
+ if (!v3d_hw_has_gca(sim_state.v3d))
+ return;
+
+ uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
+
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
+ V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
+}
+
+/* Invalidates the L2 cache. This is a read-only cache. */
+static void
+vc5_flush_l2(void)
+{
+ V3D_WRITE(V3D_CTL_0_L2CACTL,
+ V3D_CTL_0_L2CACTL_L2CCLR_SET |
+ V3D_CTL_0_L2CACTL_L2CENA_SET);
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+vc5_flush_l2t(void)
+{
+ V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
+ V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
+ V3D_WRITE(V3D_CTL_0_L2TCACTL,
+ V3D_CTL_0_L2TCACTL_L2TFLS_SET |
+ (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+}
+
+/* Invalidates the slice caches. These are read-only caches. */
+static void
+vc5_flush_slices(void)
+{
+ V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
+}
+
+static void
+vc5_flush_caches(void)
+{
+ vc5_flush_l3();
+ vc5_flush_l2();
+ vc5_flush_l2t();
+ vc5_flush_slices();
+}
+
+int
+vc5_simulator_flush(struct vc5_context *vc5,
+ struct drm_vc5_submit_cl *submit, struct vc5_job *job)
+{
+ struct vc5_screen *screen = vc5->screen;
+ int fd = screen->fd;
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]);
+ struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL;
+ struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL;
+ uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
+ uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
+ uint32_t row_len = MIN2(sim_stride, winsys_stride);
+ int ret;
+
+ if (ctex && csim_bo->winsys_map) {
+#if 0
+ fprintf(stderr, "%dx%d %d %d %d\n",
+ ctex->base.b.width0, ctex->base.b.height0,
+ winsys_stride,
+ sim_stride,
+ ctex->bo->size);
+#endif
+
+ for (int y = 0; y < ctex->base.height0; y++) {
+ memcpy(ctex->bo->map + y * sim_stride,
+ csim_bo->winsys_map + y * winsys_stride,
+ row_len);
+ }
+ }
+
+ ret = vc5_simulator_pin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ //vc5_dump_to_file(&exec);
+
+ /* Completely reset the GMP. */
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CFG,
+ V3D_GMP_0_CFG_PROTENABLE_SET);
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_TABLE_ADDR, file->gmp->ofs);
+ v3d_hw_write_reg(sim_state.v3d, V3D_GMP_0_CLEAR_LOAD, ~0);
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_GMP_0_STATUS) &
+ V3D_GMP_0_STATUS_CFG_BUSY_SET) {
+ ;
+ }
+
+ vc5_flush_caches();
+
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QBA, submit->bcl_start);
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT0QEA, submit->bcl_end);
+
+ /* Wait for bin to complete before firing render, as it seems the
+ * simulator doesn't implement the semaphores.
+ */
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT0EA)) {
+ v3d_hw_tick(sim_state.v3d);
+ }
+
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QBA, submit->rcl_start);
+ v3d_hw_write_reg(sim_state.v3d, V3D_CLE_0_CT1QEA, submit->rcl_end);
+
+ while (v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_0_CT1EA) ||
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1CA) !=
+ v3d_hw_read_reg(sim_state.v3d, V3D_CLE_1_CT1EA)) {
+ v3d_hw_tick(sim_state.v3d);
+ }
+
+ ret = vc5_simulator_unpin_bos(fd, job);
+ if (ret)
+ return ret;
+
+ if (ctex && csim_bo->winsys_map) {
+ for (int y = 0; y < ctex->base.height0; y++) {
+ memcpy(csim_bo->winsys_map + y * winsys_stride,
+ ctex->bo->map + y * sim_stride,
+ row_len);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Map the underlying GEM object from the real hardware GEM handle.
+ */
+static void *
+vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo)
+{
+ int ret;
+ void *map;
+
+ struct drm_mode_map_dumb map_dumb = {
+ .handle = sim_bo->handle,
+ };
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
+ if (ret != 0) {
+ fprintf(stderr, "map ioctl failure\n");
+ abort();
+ }
+
+ map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, map_dumb.offset);
+ if (map == MAP_FAILED) {
+ fprintf(stderr,
+ "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+ sim_bo->handle, (long long)map_dumb.offset,
+ (int)sim_bo->size);
+ abort();
+ }
+
+ return map;
+}
+
+/**
+ * Do fixups after a BO has been opened from a handle.
+ *
+ * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
+ * time, but we're still using drmPrimeFDToHandle() so we have this helper to
+ * be called afterward instead.
+ */
+void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride,
+ int handle, uint32_t size)
+{
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, handle, size);
+
+ sim_bo->winsys_stride = winsys_stride;
+ sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo);
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation.
+ *
+ * Making a VC5 BO is just a matter of making a corresponding BO on the host.
+ */
+static int
+vc5_simulator_create_bo_ioctl(int fd, struct drm_vc5_create_bo *args)
+{
+ int ret;
+ struct drm_mode_create_dumb create = {
+ .width = 128,
+ .bpp = 8,
+ .height = (args->size + 127) / 128,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ assert(create.size >= args->size);
+
+ args->handle = create.handle;
+
+ struct vc5_simulator_bo *sim_bo =
+ vc5_create_simulator_bo(fd, create.handle, args->size);
+
+ args->offset = sim_bo->block->ofs;
+
+ return ret;
+}
+
+/**
+ * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
+ *
+ * We just pass this straight through to dumb mmap.
+ */
+static int
+vc5_simulator_mmap_bo_ioctl(int fd, struct drm_vc5_mmap_bo *args)
+{
+ int ret;
+ struct drm_mode_map_dumb map = {
+ .handle = args->handle,
+ };
+
+ ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+ args->offset = map.offset;
+
+ return ret;
+}
+
+static int
+vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
+{
+ /* Free the simulator's internal tracking. */
+ struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd);
+ struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file,
+ args->handle);
+
+ vc5_free_simulator_bo(sim_bo);
+
+ /* Pass the call on down. */
+ return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args);
+}
+
+static int
+vc5_simulator_get_param_ioctl(int fd, struct drm_vc5_get_param *args)
+{
+ static const uint32_t reg_map[] = {
+ [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
+ [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
+ [DRM_VC5_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
+ };
+
+ if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
+ args->value = v3d_hw_read_reg(sim_state.v3d,
+ reg_map[args->param]);
+ return 0;
+ }
+
+ fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
+ (long long)args->value);
+ abort();
+}
+
+int
+vc5_simulator_ioctl(int fd, unsigned long request, void *args)
+{
+ switch (request) {
+ case DRM_IOCTL_VC5_CREATE_BO:
+ return vc5_simulator_create_bo_ioctl(fd, args);
+ case DRM_IOCTL_VC5_MMAP_BO:
+ return vc5_simulator_mmap_bo_ioctl(fd, args);
+
+ case DRM_IOCTL_VC5_WAIT_BO:
+ case DRM_IOCTL_VC5_WAIT_SEQNO:
+ /* We do all of the vc5 rendering synchronously, so we just
+ * return immediately on the wait ioctls. This ignores any
+ * native rendering to the host BO, so it does mean we race on
+ * front buffer rendering.
+ */
+ return 0;
+
+ case DRM_IOCTL_VC5_GET_PARAM:
+ return vc5_simulator_get_param_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_CLOSE:
+ return vc5_simulator_gem_close_ioctl(fd, args);
+
+ case DRM_IOCTL_GEM_OPEN:
+ case DRM_IOCTL_GEM_FLINK:
+ return drmIoctl(fd, request, args);
+ default:
+ fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request);
+ abort();
+ }
+}
+
+static void
+vc5_simulator_init_global(void)
+{
+ mtx_lock(&sim_state.mutex);
+ if (sim_state.refcount++) {
+ mtx_unlock(&sim_state.mutex);
+ return;
+ }
+
+ sim_state.v3d = v3d_hw_auto_new(NULL);
+ v3d_hw_alloc_mem(sim_state.v3d, 256 * 1024 * 1024);
+ sim_state.mem_base =
+ v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size,
+ &sim_state.mem);
+
+ sim_state.heap = u_mmInit(0, sim_state.mem_size);
+
+ /* Make a block of 0xd0 at address 0 to make sure we don't screw up
+ * and land there.
+ */
+ struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0);
+ memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096);
+
+ mtx_unlock(&sim_state.mutex);
+
+ sim_state.fd_map =
+ _mesa_hash_table_create(NULL,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
+void
+vc5_simulator_init(struct vc5_screen *screen)
+{
+ vc5_simulator_init_global();
+
+ screen->sim_file = rzalloc(screen, struct vc5_simulator_file);
+ struct vc5_simulator_file *sim_file = screen->sim_file;
+
+ screen->sim_file->bo_map =
+ _mesa_hash_table_create(screen->sim_file,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ mtx_lock(&sim_state.mutex);
+ _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1),
+ screen->sim_file);
+ mtx_unlock(&sim_state.mutex);
+
+ sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0);
+ sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs -
+ sim_state.mem_base);
+}
+
+void
+vc5_simulator_destroy(struct vc5_screen *screen)
+{
+ mtx_lock(&sim_state.mutex);
+ if (!--sim_state.refcount) {
+ _mesa_hash_table_destroy(sim_state.fd_map, NULL);
+ u_mmDestroy(sim_state.heap);
+ /* No memsetting the struct, because it contains the mutex. */
+ sim_state.mem = NULL;
+ }
+ mtx_unlock(&sim_state.mutex);
+}
+
+#endif /* USE_VC5_SIMULATOR */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_state.c b/lib/mesa/src/gallium/drivers/vc5/vc5_state.c
new file mode 100644
index 000000000..eebf94b4b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_state.c
@@ -0,0 +1,749 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_half.h"
+#include "util/u_helpers.h"
+
+#include "vc5_context.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static void *
+vc5_generic_cso_state_create(const void *src, uint32_t size)
+{
+ void *dst = calloc(1, size);
+ if (!dst)
+ return NULL;
+ memcpy(dst, src, size);
+ return dst;
+}
+
+static void
+vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+vc5_set_blend_color(struct pipe_context *pctx,
+ const struct pipe_blend_color *blend_color)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend_color.f = *blend_color;
+ for (int i = 0; i < 4; i++) {
+ vc5->blend_color.hf[i] =
+ util_float_to_half(blend_color->color[i]);
+ }
+ vc5->dirty |= VC5_DIRTY_BLEND_COLOR;
+}
+
+static void
+vc5_set_stencil_ref(struct pipe_context *pctx,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stencil_ref = *stencil_ref;
+ vc5->dirty |= VC5_DIRTY_STENCIL_REF;
+}
+
+static void
+vc5_set_clip_state(struct pipe_context *pctx,
+ const struct pipe_clip_state *clip)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->clip = *clip;
+ vc5->dirty |= VC5_DIRTY_CLIP;
+}
+
+static void
+vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
+ vc5->dirty |= VC5_DIRTY_SAMPLE_MASK;
+}
+
+static uint16_t
+float_to_187_half(float f)
+{
+ return fui(f) >> 16;
+}
+
+static void *
+vc5_create_rasterizer_state(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct vc5_rasterizer_state *so;
+
+ so = CALLOC_STRUCT(vc5_rasterizer_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+ * BCM21553).
+ */
+ so->point_size = MAX2(cso->point_size, .125f);
+
+ if (cso->offset_tri) {
+ so->offset_units = float_to_187_half(cso->offset_units);
+ so->offset_factor = float_to_187_half(cso->offset_scale);
+ }
+
+ return so;
+}
+
+/* Blend state is baked into shaders. */
+static void *
+vc5_create_blend_state(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso)
+{
+ return vc5_generic_cso_state_create(cso, sizeof(*cso));
+}
+
+static void *
+vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct vc5_depth_stencil_alpha_state *so;
+
+ so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ if (cso->depth.enabled) {
+ /* We only handle early Z in the < direction because otherwise
+ * we'd have to runtime guess which direction to set in the
+ * render config.
+ */
+ so->early_z_enable =
+ ((cso->depth.func == PIPE_FUNC_LESS ||
+ cso->depth.func == PIPE_FUNC_LEQUAL) &&
+ (!cso->stencil[0].enabled ||
+ (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
+ (!cso->stencil[1].enabled ||
+ cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP))));
+ }
+
+ return so;
+}
+
+static void
+vc5_set_polygon_stipple(struct pipe_context *pctx,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->stipple = *stipple;
+ vc5->dirty |= VC5_DIRTY_STIPPLE;
+}
+
+static void
+vc5_set_scissor_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+
+ vc5->scissor = *scissor;
+ vc5->dirty |= VC5_DIRTY_SCISSOR;
+}
+
+static void
+vc5_set_viewport_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewport)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->viewport = *viewport;
+ vc5->dirty |= VC5_DIRTY_VIEWPORT;
+}
+
+static void
+vc5_set_vertex_buffers(struct pipe_context *pctx,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf;
+
+ util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
+ start_slot, count);
+ so->count = util_last_bit(so->enabled_mask);
+
+ vc5->dirty |= VC5_DIRTY_VTXBUF;
+}
+
+static void
+vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->blend = hwcso;
+ vc5->dirty |= VC5_DIRTY_BLEND;
+}
+
+static void
+vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_rasterizer_state *rast = hwcso;
+
+ if (vc5->rasterizer && rast &&
+ vc5->rasterizer->base.flatshade != rast->base.flatshade) {
+ vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+ }
+
+ vc5->rasterizer = hwcso;
+ vc5->dirty |= VC5_DIRTY_RASTERIZER;
+}
+
+static void
+vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->zsa = hwcso;
+ vc5->dirty |= VC5_DIRTY_ZSA;
+}
+
+static void *
+vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+ so->num_elements = num_elements;
+
+ for (int i = 0; i < so->num_elements; i++) {
+ const struct pipe_vertex_element *elem = &elements[i];
+ const struct util_format_description *desc =
+ util_format_description(elem->src_format);
+ uint32_t r_size = desc->channel[0].size;
+
+ struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = {
+ /* vec_size == 0 means 4 */
+ .vec_size = desc->nr_channels & 3,
+ .signed_int_type = (desc->channel[0].type ==
+ UTIL_FORMAT_TYPE_SIGNED),
+
+ .normalized_int_type = desc->channel[0].normalized,
+ .read_as_int_uint = desc->channel[0].pure_integer,
+ .instance_divisor = elem->instance_divisor,
+ };
+
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (r_size == 32) {
+ attr_unpacked.type = ATTRIBUTE_FLOAT;
+ } else {
+ assert(r_size == 16);
+ attr_unpacked.type = ATTRIBUTE_HALF_FLOAT;
+ }
+ break;
+
+ case UTIL_FORMAT_TYPE_SIGNED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ switch (r_size) {
+ case 32:
+ attr_unpacked.type = ATTRIBUTE_INT;
+ break;
+ case 16:
+ attr_unpacked.type = ATTRIBUTE_SHORT;
+ break;
+ case 10:
+ attr_unpacked.type = ATTRIBUTE_INT2_10_10_10;
+ break;
+ case 8:
+ attr_unpacked.type = ATTRIBUTE_BYTE;
+ break;
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ attr_unpacked.type = ATTRIBUTE_BYTE;
+ abort();
+ }
+ break;
+
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ abort();
+ }
+
+ const uint32_t size =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+ V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(NULL,
+ (uint8_t *)&so->attrs[i * size],
+ &attr_unpacked);
+ }
+
+ /* Set up the default attribute values in case any of the vertex
+ * elements use them.
+ */
+ so->default_attribute_values = vc5_bo_alloc(vc5->screen,
+ VC5_MAX_ATTRIBUTES *
+ 4 * sizeof(float),
+ "default attributes");
+ uint32_t *attrs = vc5_bo_map(so->default_attribute_values);
+ for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) {
+ attrs[i * 4 + 0] = 0;
+ attrs[i * 4 + 1] = 0;
+ attrs[i * 4 + 2] = 0;
+ if (i < so->num_elements &&
+ util_format_is_pure_integer(so->pipe[i].src_format)) {
+ attrs[i * 4 + 3] = 1;
+ } else {
+ attrs[i * 4 + 3] = fui(1.0);
+ }
+ }
+
+ return so;
+}
+
+static void
+vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ vc5->vtx = hwcso;
+ vc5->dirty |= VC5_DIRTY_VTXSTATE;
+}
+
+static void
+vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+ const struct pipe_constant_buffer *cb)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader];
+
+ util_copy_constant_buffer(&so->cb[index], cb);
+
+ /* Note that the state tracker can unbind constant buffers by
+ * passing NULL here.
+ */
+ if (unlikely(!cb)) {
+ so->enabled_mask &= ~(1 << index);
+ so->dirty_mask &= ~(1 << index);
+ return;
+ }
+
+ so->enabled_mask |= 1 << index;
+ so->dirty_mask |= 1 << index;
+ vc5->dirty |= VC5_DIRTY_CONSTBUF;
+}
+
+static void
+vc5_set_framebuffer_state(struct pipe_context *pctx,
+ const struct pipe_framebuffer_state *framebuffer)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct pipe_framebuffer_state *cso = &vc5->framebuffer;
+ unsigned i;
+
+ vc5->job = NULL;
+
+ for (i = 0; i < framebuffer->nr_cbufs; i++)
+ pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
+ for (; i < vc5->framebuffer.nr_cbufs; i++)
+ pipe_surface_reference(&cso->cbufs[i], NULL);
+
+ cso->nr_cbufs = framebuffer->nr_cbufs;
+
+ pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
+ cso->width = framebuffer->width;
+ cso->height = framebuffer->height;
+
+ vc5->dirty |= VC5_DIRTY_FRAMEBUFFER;
+}
+
+static struct vc5_texture_stateobj *
+vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader)
+{
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ vc5->dirty |= VC5_DIRTY_FRAGTEX;
+ return &vc5->fragtex;
+ break;
+ case PIPE_SHADER_VERTEX:
+ vc5->dirty |= VC5_DIRTY_VERTTEX;
+ return &vc5->verttex;
+ break;
+ default:
+ fprintf(stderr, "Unknown shader target %d\n", shader);
+ abort();
+ }
+}
+
+static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
+{
+ switch (pipe_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ unreachable("Unknown wrap mode");
+ }
+}
+
+
+static void *
+vc5_create_sampler_state(struct pipe_context *pctx,
+ const struct pipe_sampler_state *cso)
+{
+ struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
+ .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest),
+ .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest),
+ .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest),
+ };
+ V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
+ (uint8_t *)&so->p0,
+ &p0_unpacked);
+
+ struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+ cl_packet_header(TEXTURE_SHADER_STATE),
+
+ .min_level_of_detail = MAX2(cso->min_lod, 0.0),
+ .depth_compare_function = cso->compare_func,
+ .fixed_bias = cso->lod_bias,
+ };
+ STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+ cl_packet_length(TEXTURE_SHADER_STATE));
+ cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+ &state_unpacked);
+
+ return so;
+}
+
+static void
+vc5_sampler_states_bind(struct pipe_context *pctx,
+ enum pipe_shader_type shader, unsigned start,
+ unsigned nr, void **hwcso)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+
+ assert(start == 0);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ for (i = 0; i < nr; i++) {
+ if (hwcso[i])
+ new_nr = i + 1;
+ stage_tex->samplers[i] = hwcso[i];
+ }
+
+ for (; i < stage_tex->num_samplers; i++) {
+ stage_tex->samplers[i] = NULL;
+ }
+
+ stage_tex->num_samplers = new_nr;
+}
+
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+ switch (pipe_swizzle) {
+ case PIPE_SWIZZLE_0:
+ return 0;
+ case PIPE_SWIZZLE_1:
+ return 1;
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W:
+ return 2 + pipe_swizzle;
+ default:
+ unreachable("unknown swizzle");
+ }
+}
+
+static struct pipe_sampler_view *
+vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
+ const struct pipe_sampler_view *cso)
+{
+ struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view);
+ struct vc5_resource *rsc = vc5_resource(prsc);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ pipe_reference(NULL, &prsc->reference);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+ };
+
+ unpacked.return_word_0_of_texture_data = true;
+ if (vc5_get_tex_return_size(cso->format) == 16) {
+ unpacked.return_word_1_of_texture_data = true;
+ } else {
+ int chans = vc5_get_tex_return_channels(cso->format);
+
+ if (chans > 1)
+ unpacked.return_word_1_of_texture_data = true;
+ if (chans > 2)
+ unpacked.return_word_2_of_texture_data = true;
+ if (chans > 3)
+ unpacked.return_word_3_of_texture_data = true;
+ }
+
+ V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
+ (uint8_t *)&so->p1,
+ &unpacked);
+
+ /* Compute the sampler view's swizzle up front. This will be plugged
+ * into either the sampler (for 16-bit returns) or the shader's
+ * texture key (for 32)
+ */
+ uint8_t view_swizzle[4] = {
+ cso->swizzle_r,
+ cso->swizzle_g,
+ cso->swizzle_b,
+ cso->swizzle_a
+ };
+ const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format);
+ util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle);
+
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ struct V3D33_TEXTURE_SHADER_STATE state_unpacked = {
+ cl_packet_header(TEXTURE_SHADER_STATE),
+
+ .image_width = prsc->width0,
+ .image_height = prsc->height0,
+ .image_depth = prsc->depth0,
+
+ .texture_type = rsc->tex_format,
+ .srgb = util_format_is_srgb(cso->format),
+
+ .base_level = cso->u.tex.first_level,
+ .array_stride_64_byte_aligned = rsc->cube_map_stride / 64,
+ };
+
+ /* Note: Contrary to the docs, the swizzle still applies even
+ * if the return size is 32. It's just that you probably want
+ * to swizzle in the shader, because you need the Y/Z/W
+ * channels to be defined.
+ */
+ if (vc5_get_tex_return_size(cso->format) != 32) {
+ state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]);
+ state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]);
+ state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]);
+ state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]);
+ } else {
+ state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+ state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+ state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+ state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+ }
+
+ /* XXX: While we need to use this flag to enable tiled
+ * resource sharing (even a small shared buffer should be UIF,
+ * not UBLINEAR or raster), this is also at the moment
+ * patching up the fact that our resource layout's decisions
+ * about XOR don't quite match the HW's.
+ */
+ switch (rsc->slices[0].tiling) {
+ case VC5_TILING_UIF_NO_XOR:
+ case VC5_TILING_UIF_XOR:
+ state_unpacked.level_0_is_strictly_uif = true;
+ state_unpacked.level_0_xor_enable = false;
+ break;
+ default:
+ break;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) ==
+ cl_packet_length(TEXTURE_SHADER_STATE));
+ cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state,
+ &state_unpacked);
+
+ return &so->base;
+}
+
+static void
+vc5_sampler_view_destroy(struct pipe_context *pctx,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ free(view);
+}
+
+static void
+vc5_set_sampler_views(struct pipe_context *pctx,
+ enum pipe_shader_type shader,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct vc5_context *vc5 = vc5_context(pctx);
+ struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader);
+ unsigned i;
+ unsigned new_nr = 0;
+
+ assert(start == 0);
+
+ for (i = 0; i < nr; i++) {
+ if (views[i])
+ new_nr = i + 1;
+ pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
+ }
+
+ for (; i < stage_tex->num_textures; i++) {
+ pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
+ }
+
+ stage_tex->num_textures = new_nr;
+}
+
+static struct pipe_stream_output_target *
+vc5_create_stream_output_target(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ pipe_reference_init(&target->reference, 1);
+ pipe_resource_reference(&target->buffer, prsc);
+
+ target->context = pctx;
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+
+ return target;
+}
+
+static void
+vc5_stream_output_target_destroy(struct pipe_context *pctx,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ free(target);
+}
+
+static void
+vc5_set_stream_output_targets(struct pipe_context *pctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct vc5_context *ctx = vc5_context(pctx);
+ struct vc5_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
+
+ assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (i = 0; i < num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+
+ for (; i < so->num_targets; i++)
+ pipe_so_target_reference(&so->targets[i], NULL);
+
+ so->num_targets = num_targets;
+
+ ctx->dirty |= VC5_DIRTY_STREAMOUT;
+}
+
+void
+vc5_state_init(struct pipe_context *pctx)
+{
+ pctx->set_blend_color = vc5_set_blend_color;
+ pctx->set_stencil_ref = vc5_set_stencil_ref;
+ pctx->set_clip_state = vc5_set_clip_state;
+ pctx->set_sample_mask = vc5_set_sample_mask;
+ pctx->set_constant_buffer = vc5_set_constant_buffer;
+ pctx->set_framebuffer_state = vc5_set_framebuffer_state;
+ pctx->set_polygon_stipple = vc5_set_polygon_stipple;
+ pctx->set_scissor_states = vc5_set_scissor_states;
+ pctx->set_viewport_states = vc5_set_viewport_states;
+
+ pctx->set_vertex_buffers = vc5_set_vertex_buffers;
+
+ pctx->create_blend_state = vc5_create_blend_state;
+ pctx->bind_blend_state = vc5_blend_state_bind;
+ pctx->delete_blend_state = vc5_generic_cso_state_delete;
+
+ pctx->create_rasterizer_state = vc5_create_rasterizer_state;
+ pctx->bind_rasterizer_state = vc5_rasterizer_state_bind;
+ pctx->delete_rasterizer_state = vc5_generic_cso_state_delete;
+
+ pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state;
+ pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind;
+ pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete;
+
+ pctx->create_vertex_elements_state = vc5_vertex_state_create;
+ pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete;
+ pctx->bind_vertex_elements_state = vc5_vertex_state_bind;
+
+ pctx->create_sampler_state = vc5_create_sampler_state;
+ pctx->delete_sampler_state = vc5_generic_cso_state_delete;
+ pctx->bind_sampler_states = vc5_sampler_states_bind;
+
+ pctx->create_sampler_view = vc5_create_sampler_view;
+ pctx->sampler_view_destroy = vc5_sampler_view_destroy;
+ pctx->set_sampler_views = vc5_set_sampler_views;
+
+ pctx->create_stream_output_target = vc5_create_stream_output_target;
+ pctx->stream_output_target_destroy = vc5_stream_output_target_destroy;
+ pctx->set_stream_output_targets = vc5_set_stream_output_targets;
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c
new file mode 100644
index 000000000..279774e55
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc5_tiling.c
+ *
+ * Handles information about the VC5 tiling formats, and loading and storing
+ * from them.
+ */
+
+#include <stdint.h>
+#include "vc5_screen.h"
+#include "vc5_context.h"
+#include "vc5_tiling.h"
+
+struct mb_layout {
+ /** Height, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+ uint32_t height;
+ /** Width, in pixels, of a macroblock (2x2 utiles, a UIF block). */
+ uint32_t width;
+ uint32_t tile_row_stride;
+};
+
+enum {
+ MB_LAYOUT_8BPP,
+ MB_LAYOUT_16BPP,
+ MB_LAYOUT_32BPP,
+ MB_LAYOUT_64BPP,
+ MB_LAYOUT_128BPP,
+};
+
+static const struct mb_layout mb_layouts[] = {
+ [MB_LAYOUT_8BPP] = { .height = 16, .width = 16, .tile_row_stride = 8 },
+ [MB_LAYOUT_16BPP] = { .height = 8, .width = 16, .tile_row_stride = 8 },
+ [MB_LAYOUT_32BPP] = { .height = 8, .width = 8, .tile_row_stride = 4 },
+ [MB_LAYOUT_64BPP] = { .height = 4, .width = 8, .tile_row_stride = 4 },
+ [MB_LAYOUT_128BPP] = { .height = 4, .width = 4, .tile_row_stride = 2 },
+};
+
+static const struct mb_layout *
+get_mb_layout(int cpp)
+{
+ const struct mb_layout *layout = &mb_layouts[ffs(cpp) - 1];
+
+ /* Sanity check the table. XXX: We should de-duplicate. */
+ assert(layout->width == vc5_utile_width(cpp) * 2);
+ assert(layout->tile_row_stride == vc5_utile_width(cpp));
+
+ return layout;
+}
+
+/** Return the width in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_width(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ case 2:
+ return 8;
+ case 4:
+ case 8:
+ return 4;
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+uint32_t
+vc5_utile_height(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ return 4;
+ case 8:
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/**
+ * Returns the byte address for a given pixel within a utile.
+ *
+ * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
+ * arrangement.
+ */
+static inline uint32_t
+vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+
+ assert(x < utile_w && y < utile_h);
+
+ return x * cpp + y * utile_w * cpp;
+}
+
+/**
+ * Returns the byte offset for a given pixel in a LINEARTILE layout.
+ *
+ * LINEARTILE is a single line of utiles in either the X or Y direction.
+ */
+static inline uint32_t
+vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t utile_index_x = x / utile_w;
+ uint32_t utile_index_y = y / utile_h;
+
+ assert(utile_index_x == 0 || utile_index_y == 0);
+
+ return (64 * (utile_index_x + utile_index_y) +
+ vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UBLINEAR layout.
+ *
+ * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
+ * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
+ */
+static inline uint32_t
+vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+ int ublinear_number)
+{
+ uint32_t utile_w = vc5_utile_width(cpp);
+ uint32_t utile_h = vc5_utile_height(cpp);
+ uint32_t ub_w = utile_w * 2;
+ uint32_t ub_h = utile_h * 2;
+ uint32_t ub_x = x / ub_w;
+ uint32_t ub_y = y / ub_h;
+
+ return (256 * (ub_y * ublinear_number +
+ ub_x) +
+ ((x & utile_w) ? 64 : 0) +
+ ((y & utile_h) ? 128 : 0) +
+ + vc5_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+static inline uint32_t
+vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
+}
+
+static inline uint32_t
+vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UIF layout.
+ *
+ * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
+ * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
+ * 4x4 groups, and those 4x4 groups are then stored in raster order.
+ */
+static inline uint32_t
+vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ const struct mb_layout *layout = get_mb_layout(cpp);
+ uint32_t mb_width = layout->width;
+ uint32_t mb_height = layout->height;
+ uint32_t log2_mb_width = ffs(mb_width) - 1;
+ uint32_t log2_mb_height = ffs(mb_height) - 1;
+
+ /* Macroblock X, y */
+ uint32_t mb_x = x >> log2_mb_width;
+ uint32_t mb_y = y >> log2_mb_height;
+ /* X, y within the macroblock */
+ uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
+ uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
+
+ uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
+ uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
+
+ uint32_t mb_base_addr = mb_id * 256;
+
+ bool top = mb_pixel_y < mb_height / 2;
+ bool left = mb_pixel_x < mb_width / 2;
+
+ /* Docs have this in pixels, we do bytes here. */
+ uint32_t mb_tile_offset = (!top * 128 + !left * 64);
+
+ uint32_t mb_tile_y = mb_pixel_y & ~(mb_height / 2);
+ uint32_t mb_tile_x = mb_pixel_x & ~(mb_width / 2);
+ uint32_t mb_tile_pixel_id = (mb_tile_y *
+ layout->tile_row_stride +
+ mb_tile_x);
+
+ uint32_t mb_tile_addr = mb_tile_pixel_id * cpp;
+
+ uint32_t mb_pixel_address = (mb_base_addr +
+ mb_tile_offset +
+ mb_tile_addr);
+
+ return mb_pixel_address;
+}
+
+static inline void
+vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ for (uint32_t y = 0; y < box->height; y++) {
+ void *cpu_row = cpu + y * cpu_stride;
+
+ for (int x = 0; x < box->width; x++) {
+ uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
+ box->x + x,
+ box->y + y);
+
+ if (false) {
+ fprintf(stderr, "%3d,%3d -> %d\n",
+ box->x + x, box->y + y,
+ pixel_offset);
+ }
+
+ if (is_load) {
+ memcpy(cpu_row + x * cpp,
+ gpu + pixel_offset,
+ cpp);
+ } else {
+ memcpy(gpu + pixel_offset,
+ cpu_row + x * cpp,
+ cpp);
+ }
+ }
+ }
+}
+
+static inline void
+vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ switch (cpp) {
+ case 1:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 1, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 2:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 2, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 4:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 4, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 8:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 8, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 16:
+ vc5_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 16, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ }
+}
+
+static inline void
+vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ enum vc5_tiling_mode tiling_format,
+ int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box,
+ bool is_load)
+{
+ switch (tiling_format) {
+ case VC5_TILING_UIF_NO_XOR:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_uif_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_2_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_2_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_UBLINEAR_1_COLUMN:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_ublinear_1_column_pixel_offset,
+ is_load);
+ break;
+ case VC5_TILING_LINEARTILE:
+ vc5_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ vc5_get_lt_pixel_offset,
+ is_load);
+ break;
+ default:
+ unreachable("Unsupported tiling format");
+ break;
+ }
+}
+
+/**
+ * Loads pixel data from the start (microtile-aligned) box in \p src to the
+ * start of \p dst according to the given tiling format.
+ */
+void
+vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(src, src_stride,
+ dst, dst_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ true);
+}
+
+/**
+ * Stores pixel data from the start of \p src into a (microtile-aligned) box in
+ * \p dst according to the given tiling format.
+ */
+void
+vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ vc5_move_tiled_image(dst, dst_stride,
+ src, src_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ false);
+}
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h
new file mode 100644
index 000000000..d3cf48c45
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_tiling.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_TILING_H
+#define VC5_TILING_H
+
+uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
+void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
+void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
+void vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+void vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum vc5_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+
+#endif /* VC5_TILING_H */
diff --git a/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c b/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c
new file mode 100644
index 000000000..0c8bee517
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/vc5/vc5_uniforms.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc5_context.h"
+#include "compiler/v3d_compiler.h"
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+#if 0
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc5_resource *rsc = vc5_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc5_format-sampled texture contents are
+ * replaced with our border color, the vc5_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc5_format) {
+ default:
+ case VC5_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC5_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC5_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+#endif
+
+static uint32_t
+get_texrect_scale(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static uint32_t
+get_texture_size(struct vc5_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+
+ switch (contents) {
+ case QUNIFORM_TEXTURE_WIDTH:
+ return u_minify(texture->texture->width0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_HEIGHT:
+ return u_minify(texture->texture->height0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_DEPTH:
+ return u_minify(texture->texture->depth0,
+ texture->u.tex.first_level);
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ return texture->texture->array_size;
+ case QUNIFORM_TEXTURE_LEVELS:
+ return (texture->u.tex.last_level -
+ texture->u.tex.first_level) + 1;
+ default:
+ unreachable("Bad texture size field");
+ }
+}
+
+static struct vc5_bo *
+vc5_upload_ubo(struct vc5_context *vc5,
+ struct vc5_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->prog_data.base->ubo_size)
+ return NULL;
+
+ struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen,
+ shader->prog_data.base->ubo_size,
+ "ubo");
+ void *data = vc5_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
+ memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
+ ((const void *)gallium_uniforms +
+ shader->prog_data.base->ubo_ranges[i].src_offset),
+ shader->prog_data.base->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+/**
+ * Writes the P0 (CFG_MODE=1) texture parameter.
+ *
+ * Some bits of this field are dependent on the type of sample being done by
+ * the shader, while other bits are dependent on the sampler state. We OR the
+ * two together here.
+ */
+static void
+write_texture_p0(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit,
+ uint32_t shader_data)
+{
+ struct pipe_sampler_state *psampler = texstate->samplers[unit];
+ struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
+
+ cl_aligned_u32(uniforms, shader_data | sampler->p0);
+}
+
+static void
+write_texture_p1(struct vc5_job *job,
+ struct vc5_cl_out **uniforms,
+ struct vc5_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_view *psview = texstate->textures[unit];
+ struct vc5_sampler_view *sview = vc5_sampler_view(psview);
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
+ .texture_state_record_base_address = texstate->texture_state[unit],
+ };
+
+ uint32_t packed;
+ V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
+ (uint8_t *)&packed,
+ &unpacked);
+
+ cl_aligned_u32(uniforms, packed | sview->p1);
+}
+
+struct vc5_cl_reloc
+vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader,
+ struct vc5_constbuf_stateobj *cb,
+ struct vc5_texture_stateobj *texstate)
+{
+ struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
+ struct vc5_job *job = vc5->job;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms);
+
+ /* We always need to return some space for uniforms, because the HW
+ * will be prefetching, even if we don't read any in the program.
+ */
+ vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
+
+ struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect);
+ vc5_bo_reference(uniform_stream.bo);
+
+ struct vc5_cl_out *uniforms =
+ cl_start(&job->indirect);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc5->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc5->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+#if 0
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ write_texture_first_level(job, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+#endif
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ cl_aligned_u32(&uniforms,
+ get_texture_size(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc5->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc5->zsa->base.alpha.ref_value);
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ cl_aligned_u32(&uniforms, vc5->sample_mask);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ if (uinfo->data[i] == 0) {
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ ubo, 0);
+ } else {
+ int ubo_index = uinfo->data[i];
+ struct vc5_resource *rsc =
+ vc5_resource(cb->cb[ubo_index].buffer);
+
+ cl_aligned_reloc(&job->indirect, &uniforms,
+ rsc->bo,
+ cb->cb[ubo_index].buffer_offset);
+ }
+ break;
+
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ /* XXX */
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
+
+ write_texture_p0(job, &uniforms, texstate,
+ uinfo->contents[i] -
+ QUNIFORM_TEXTURE_CONFIG_P0_0,
+ uinfo->data[i]);
+ break;
+
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+ shader, i, __gen_address_offset(&uniform_stream) + i * 4,
+ written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&job->indirect, uniforms);
+
+ vc5_bo_unreference(&ubo);
+
+ return uniform_stream;
+}
+
+void
+vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) {
+ switch (shader->prog_data.base->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC5_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC5_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC5_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXTURE_FIRST_LEVEL:
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ case QUNIFORM_TEXTURE_WIDTH:
+ case QUNIFORM_TEXTURE_HEIGHT:
+ case QUNIFORM_TEXTURE_DEPTH:
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ case QUNIFORM_TEXTURE_LEVELS:
+ /* We could flag this on just the stage we're
+ * compiling for, but it's not passed in.
+ */
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC5_DIRTY_ZSA;
+ break;
+
+ case QUNIFORM_SAMPLE_MASK:
+ dirty |= VC5_DIRTY_SAMPLE_MASK;
+ break;
+
+ default:
+ assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
+ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}
diff --git a/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp b/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp
index 5d1a0e52c..3f89644d0 100644
--- a/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp
+++ b/lib/mesa/src/gallium/state_trackers/clover/api/event.cpp
@@ -126,7 +126,8 @@ clSetEventCallback(cl_event d_ev, cl_int type,
void *user_data) try {
auto &ev = obj(d_ev);
- if (!pfn_notify || type != CL_COMPLETE)
+ if (!pfn_notify ||
+ (type != CL_COMPLETE && type != CL_SUBMITTED && type != CL_RUNNING))
throw error(CL_INVALID_VALUE);
// Create a temporary soft event that depends on ev, with
diff --git a/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp b/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp
index f7046253b..34559042a 100644
--- a/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/lib/mesa/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -295,6 +295,9 @@ clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
&mem, obj_origin, obj_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -325,6 +328,9 @@ clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
ptr, {}, obj_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -362,6 +368,9 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
&mem, obj_origin, obj_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -399,6 +408,9 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
ptr, host_origin, host_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -504,6 +516,9 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
&img, src_origin, src_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -538,6 +553,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
ptr, {}, src_pitch,
region));
+ if (blocking)
+ hev().wait_signalled();
+
ret_object(rd_ev, hev);
return CL_SUCCESS;
@@ -667,7 +685,11 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
- ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps));
+ auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
+ if (blocking)
+ hev().wait_signalled();
+
+ ret_object(rd_ev, hev);
ret_error(r_errcode, CL_SUCCESS);
return map;
@@ -695,7 +717,11 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
- ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps));
+ auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
+ if (blocking)
+ hev().wait_signalled();
+
+ ret_object(rd_ev, hev);
ret_error(r_errcode, CL_SUCCESS);
return map;
diff --git a/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp b/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp
index 8275e16a4..cd5d78660 100644
--- a/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp
+++ b/lib/mesa/src/gallium/state_trackers/clover/core/event.cpp
@@ -27,7 +27,7 @@ using namespace clover;
event::event(clover::context &ctx, const ref_vector<event> &deps,
action action_ok, action action_fail) :
- context(ctx), wait_count(1), _status(0),
+ context(ctx), _wait_count(1), _status(0),
action_ok(action_ok), action_fail(action_fail) {
for (auto &ev : deps)
ev.chain(*this);
@@ -41,22 +41,19 @@ event::trigger_self() {
std::lock_guard<std::mutex> lock(mutex);
std::vector<intrusive_ref<event>> evs;
- if (!--wait_count)
+ if (!--_wait_count)
std::swap(_chain, evs);
+ cv.notify_all();
return evs;
}
void
event::trigger() {
- auto evs = trigger_self();
-
- if (signalled()) {
+ if (wait_count() == 1)
action_ok(*this);
- cv.notify_all();
- }
- for (event &ev : evs)
+ for (event &ev : trigger_self())
ev.trigger();
}
@@ -73,18 +70,21 @@ event::abort_self(cl_int status) {
void
event::abort(cl_int status) {
- auto evs = abort_self(status);
-
action_fail(*this);
- for (event &ev : evs)
+ for (event &ev : abort_self(status))
ev.abort(status);
}
+unsigned
+event::wait_count() const {
+ std::lock_guard<std::mutex> lock(mutex);
+ return _wait_count;
+}
+
bool
event::signalled() const {
- std::lock_guard<std::mutex> lock(mutex);
- return !wait_count;
+ return !wait_count();
}
cl_int
@@ -99,20 +99,25 @@ event::chain(event &ev) {
std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock);
std::lock(lock, lock_ev);
- if (wait_count) {
- ev.wait_count++;
+ if (_wait_count) {
+ ev._wait_count++;
_chain.push_back(ev);
}
ev.deps.push_back(*this);
}
void
+event::wait_signalled() const {
+ std::unique_lock<std::mutex> lock(mutex);
+ cv.wait(lock, [=]{ return !_wait_count; });
+}
+
+void
event::wait() const {
for (event &ev : deps)
ev.wait();
- std::unique_lock<std::mutex> lock(mutex);
- cv.wait(lock, [=]{ return !wait_count; });
+ wait_signalled();
}
hard_event::hard_event(command_queue &q, cl_command_type command,
diff --git a/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp b/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp
index 6469e483c..03c97bcf4 100644
--- a/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp
+++ b/lib/mesa/src/gallium/state_trackers/clover/core/event.hpp
@@ -69,6 +69,7 @@ namespace clover {
virtual cl_int status() const;
virtual command_queue *queue() const = 0;
virtual cl_command_type command() const = 0;
+ void wait_signalled() const;
virtual void wait() const;
virtual struct pipe_fence_handle *fence() const {
@@ -85,8 +86,9 @@ namespace clover {
private:
std::vector<intrusive_ref<event>> trigger_self();
std::vector<intrusive_ref<event>> abort_self(cl_int status);
+ unsigned wait_count() const;
- unsigned wait_count;
+ unsigned _wait_count;
cl_int _status;
action action_ok;
action action_fail;
diff --git a/lib/mesa/src/gallium/state_trackers/dri/Makefile.am b/lib/mesa/src/gallium/state_trackers/dri/Makefile.am
index 61a1cabeb..8b427f98a 100644
--- a/lib/mesa/src/gallium/state_trackers/dri/Makefile.am
+++ b/lib/mesa/src/gallium/state_trackers/dri/Makefile.am
@@ -28,7 +28,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
- -I$(top_builddir)/src/mesa/drivers/dri/common \
+ -I$(top_builddir)/src/util \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
$(GALLIUM_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_context.h b/lib/mesa/src/gallium/state_trackers/dri/dri_context.h
index 96f06442f..afa9c49ff 100644
--- a/lib/mesa/src/gallium/state_trackers/dri/dri_context.h
+++ b/lib/mesa/src/gallium/state_trackers/dri/dri_context.h
@@ -90,6 +90,7 @@ dri_create_context(gl_api api,
unsigned minor_version,
uint32_t flags,
bool notify_reset,
+ unsigned priority,
unsigned *error,
void *sharedContextPrivate);
diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h b/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h
index 1f9842ea5..7c45004ba 100644
--- a/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h
+++ b/lib/mesa/src/gallium/state_trackers/dri/dri_drawable.h
@@ -85,6 +85,8 @@ struct dri_drawable
void (*update_tex_buffer)(struct dri_drawable *drawable,
struct dri_context *ctx,
struct pipe_resource *res);
+ void (*flush_swapbuffers)(struct dri_context *ctx,
+ struct dri_drawable *drawable);
};
static inline struct dri_drawable *
diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c
new file mode 100644
index 000000000..06309d8f0
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <dlfcn.h>
+#include "util/u_memory.h"
+#include "pipe/p_screen.h"
+#include "state_tracker/st_texture.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_fbo.h"
+#include "main/texobj.h"
+
+#include "dri_helpers.h"
+
+static bool
+dri2_is_opencl_interop_loaded_locked(struct dri_screen *screen)
+{
+ return screen->opencl_dri_event_add_ref &&
+ screen->opencl_dri_event_release &&
+ screen->opencl_dri_event_wait &&
+ screen->opencl_dri_event_get_fence;
+}
+
+static bool
+dri2_load_opencl_interop(struct dri_screen *screen)
+{
+#if defined(RTLD_DEFAULT)
+ bool success;
+
+ mtx_lock(&screen->opencl_func_mutex);
+
+ if (dri2_is_opencl_interop_loaded_locked(screen)) {
+ mtx_unlock(&screen->opencl_func_mutex);
+ return true;
+ }
+
+ screen->opencl_dri_event_add_ref =
+ dlsym(RTLD_DEFAULT, "opencl_dri_event_add_ref");
+ screen->opencl_dri_event_release =
+ dlsym(RTLD_DEFAULT, "opencl_dri_event_release");
+ screen->opencl_dri_event_wait =
+ dlsym(RTLD_DEFAULT, "opencl_dri_event_wait");
+ screen->opencl_dri_event_get_fence =
+ dlsym(RTLD_DEFAULT, "opencl_dri_event_get_fence");
+
+ success = dri2_is_opencl_interop_loaded_locked(screen);
+ mtx_unlock(&screen->opencl_func_mutex);
+ return success;
+#else
+ return false;
+#endif
+}
+
+struct dri2_fence {
+ struct dri_screen *driscreen;
+ struct pipe_fence_handle *pipe_fence;
+ void *cl_event;
+};
+
+static unsigned dri2_fence_get_caps(__DRIscreen *_screen)
+{
+ struct dri_screen *driscreen = dri_screen(_screen);
+ struct pipe_screen *screen = driscreen->base.screen;
+ unsigned caps = 0;
+
+ if (screen->get_param(screen, PIPE_CAP_NATIVE_FENCE_FD))
+ caps |= __DRI_FENCE_CAP_NATIVE_FD;
+
+ return caps;
+}
+
+static void *
+dri2_create_fence(__DRIcontext *_ctx)
+{
+ struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
+ struct dri2_fence *fence = CALLOC_STRUCT(dri2_fence);
+
+ if (!fence)
+ return NULL;
+
+ ctx->flush(ctx, &fence->pipe_fence, 0);
+
+ if (!fence->pipe_fence) {
+ FREE(fence);
+ return NULL;
+ }
+
+ fence->driscreen = dri_screen(_ctx->driScreenPriv);
+ return fence;
+}
+
+static void *
+dri2_create_fence_fd(__DRIcontext *_ctx, int fd)
+{
+ struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
+ struct dri2_fence *fence = CALLOC_STRUCT(dri2_fence);
+
+ if (fd == -1) {
+ /* exporting driver created fence, flush: */
+ ctx->flush(ctx, &fence->pipe_fence,
+ PIPE_FLUSH_DEFERRED | PIPE_FLUSH_FENCE_FD);
+ } else {
+ /* importing a foreign fence fd: */
+ ctx->create_fence_fd(ctx, &fence->pipe_fence, fd);
+ }
+ if (!fence->pipe_fence) {
+ FREE(fence);
+ return NULL;
+ }
+
+ fence->driscreen = dri_screen(_ctx->driScreenPriv);
+ return fence;
+}
+
+static int
+dri2_get_fence_fd(__DRIscreen *_screen, void *_fence)
+{
+ struct dri_screen *driscreen = dri_screen(_screen);
+ struct pipe_screen *screen = driscreen->base.screen;
+ struct dri2_fence *fence = (struct dri2_fence*)_fence;
+
+ return screen->fence_get_fd(screen, fence->pipe_fence);
+}
+
+static void *
+dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
+{
+ struct dri_screen *driscreen = dri_screen(_screen);
+ struct dri2_fence *fence;
+
+ if (!dri2_load_opencl_interop(driscreen))
+ return NULL;
+
+ fence = CALLOC_STRUCT(dri2_fence);
+ if (!fence)
+ return NULL;
+
+ fence->cl_event = (void*)cl_event;
+
+ if (!driscreen->opencl_dri_event_add_ref(fence->cl_event)) {
+ free(fence);
+ return NULL;
+ }
+
+ fence->driscreen = driscreen;
+ return fence;
+}
+
+static void
+dri2_destroy_fence(__DRIscreen *_screen, void *_fence)
+{
+ struct dri_screen *driscreen = dri_screen(_screen);
+ struct pipe_screen *screen = driscreen->base.screen;
+ struct dri2_fence *fence = (struct dri2_fence*)_fence;
+
+ if (fence->pipe_fence)
+ screen->fence_reference(screen, &fence->pipe_fence, NULL);
+ else if (fence->cl_event)
+ driscreen->opencl_dri_event_release(fence->cl_event);
+ else
+ assert(0);
+
+ FREE(fence);
+}
+
+static GLboolean
+dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
+ uint64_t timeout)
+{
+ struct dri2_fence *fence = (struct dri2_fence*)_fence;
+ struct dri_screen *driscreen = fence->driscreen;
+ struct pipe_screen *screen = driscreen->base.screen;
+
+ /* No need to flush. The context was flushed when the fence was created. */
+
+ if (fence->pipe_fence)
+ return screen->fence_finish(screen, NULL, fence->pipe_fence, timeout);
+ else if (fence->cl_event) {
+ struct pipe_fence_handle *pipe_fence =
+ driscreen->opencl_dri_event_get_fence(fence->cl_event);
+
+ if (pipe_fence)
+ return screen->fence_finish(screen, NULL, pipe_fence, timeout);
+ else
+ return driscreen->opencl_dri_event_wait(fence->cl_event, timeout);
+ }
+ else {
+ assert(0);
+ return false;
+ }
+}
+
+static void
+dri2_server_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags)
+{
+ struct pipe_context *ctx = dri_context(_ctx)->st->pipe;
+ struct dri2_fence *fence = (struct dri2_fence*)_fence;
+
+ if (ctx->fence_server_sync)
+ ctx->fence_server_sync(ctx, fence->pipe_fence);
+}
+
+const __DRI2fenceExtension dri2FenceExtension = {
+ .base = { __DRI2_FENCE, 2 },
+
+ .create_fence = dri2_create_fence,
+ .get_fence_from_cl_event = dri2_get_fence_from_cl_event,
+ .destroy_fence = dri2_destroy_fence,
+ .client_wait_sync = dri2_client_wait_sync,
+ .server_wait_sync = dri2_server_wait_sync,
+ .get_capabilities = dri2_fence_get_caps,
+ .create_fence_fd = dri2_create_fence_fd,
+ .get_fence_fd = dri2_get_fence_fd,
+};
+
+__DRIimage *
+dri2_lookup_egl_image(struct dri_screen *screen, void *handle)
+{
+ const __DRIimageLookupExtension *loader = screen->sPriv->dri2.image;
+ __DRIimage *img;
+
+ if (!loader->lookupEGLImage)
+ return NULL;
+
+ img = loader->lookupEGLImage(screen->sPriv,
+ handle, screen->sPriv->loaderPrivate);
+
+ return img;
+}
+
+__DRIimage *
+dri2_create_image_from_renderbuffer2(__DRIcontext *context,
+ int renderbuffer, void *loaderPrivate,
+ unsigned *error)
+{
+ struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx;
+ struct gl_renderbuffer *rb;
+ struct pipe_resource *tex;
+ __DRIimage *img;
+
+ /* Section 3.9 (EGLImage Specification and Management) of the EGL 1.5
+ * specification says:
+ *
+ * "If target is EGL_GL_RENDERBUFFER and buffer is not the name of a
+ * renderbuffer object, or if buffer is the name of a multisampled
+ * renderbuffer object, the error EGL_BAD_PARAMETER is generated."
+ *
+ * "If target is EGL_GL_TEXTURE_2D , EGL_GL_TEXTURE_CUBE_MAP_*,
+ * EGL_GL_RENDERBUFFER or EGL_GL_TEXTURE_3D and buffer refers to the
+ * default GL texture object (0) for the corresponding GL target, the
+ * error EGL_BAD_PARAMETER is generated."
+ * (rely on _mesa_lookup_renderbuffer returning NULL in this case)
+ */
+ rb = _mesa_lookup_renderbuffer(ctx, renderbuffer);
+ if (!rb || rb->NumSamples > 0) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ return NULL;
+ }
+
+ tex = st_get_renderbuffer_resource(rb);
+ if (!tex) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ return NULL;
+ }
+
+ img = CALLOC_STRUCT(__DRIimageRec);
+ if (!img) {
+ *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
+ return NULL;
+ }
+
+ img->dri_format = driGLFormatToImageFormat(rb->Format);
+ img->loader_private = loaderPrivate;
+
+ if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ free(img);
+ return NULL;
+ }
+
+ pipe_resource_reference(&img->texture, tex);
+
+ *error = __DRI_IMAGE_ERROR_SUCCESS;
+ return img;
+}
+
+__DRIimage *
+dri2_create_image_from_renderbuffer(__DRIcontext *context,
+ int renderbuffer, void *loaderPrivate)
+{
+ unsigned error;
+ return dri2_create_image_from_renderbuffer2(context, renderbuffer,
+ loaderPrivate, &error);
+}
+
+void
+dri2_destroy_image(__DRIimage *img)
+{
+ pipe_resource_reference(&img->texture, NULL);
+ FREE(img);
+}
+
+
+__DRIimage *
+dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture,
+ int depth, int level, unsigned *error,
+ void *loaderPrivate)
+{
+ __DRIimage *img;
+ struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx;
+ struct gl_texture_object *obj;
+ struct pipe_resource *tex;
+ GLuint face = 0;
+
+ obj = _mesa_lookup_texture(ctx, texture);
+ if (!obj || obj->Target != target) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ return NULL;
+ }
+
+ tex = st_get_texobj_resource(obj);
+ if (!tex) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ return NULL;
+ }
+
+ if (target == GL_TEXTURE_CUBE_MAP)
+ face = depth;
+
+ _mesa_test_texobj_completeness(ctx, obj);
+ if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ return NULL;
+ }
+
+ if (level < obj->BaseLevel || level > obj->_MaxLevel) {
+ *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+ return NULL;
+ }
+
+ if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < depth) {
+ *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+ return NULL;
+ }
+
+ img = CALLOC_STRUCT(__DRIimageRec);
+ if (!img) {
+ *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
+ return NULL;
+ }
+
+ img->level = level;
+ img->layer = depth;
+ img->dri_format = driGLFormatToImageFormat(obj->Image[face][level]->TexFormat);
+
+ img->loader_private = loaderPrivate;
+
+ if (img->dri_format == __DRI_IMAGE_FORMAT_NONE) {
+ *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+ free(img);
+ return NULL;
+ }
+
+ pipe_resource_reference(&img->texture, tex);
+
+ *error = __DRI_IMAGE_ERROR_SUCCESS;
+ return img;
+}
+
+/* vim: set sw=3 ts=8 sts=3 expandtab: */
diff --git a/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h
new file mode 100644
index 000000000..76f024fd6
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/dri/dri_helpers.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DRI_HELPERS_H
+#define DRI_HELPERS_H
+
+#include "dri_context.h"
+#include "dri_screen.h"
+
+extern const __DRI2fenceExtension dri2FenceExtension;
+
+__DRIimage *
+dri2_lookup_egl_image(struct dri_screen *screen, void *handle);
+
+__DRIimage *
+dri2_create_image_from_renderbuffer(__DRIcontext *context,
+ int renderbuffer, void *loaderPrivate);
+
+__DRIimage *
+dri2_create_image_from_renderbuffer2(__DRIcontext *context,
+ int renderbuffer, void *loaderPrivate,
+ unsigned *error);
+
+void
+dri2_destroy_image(__DRIimage *img);
+
+__DRIimage *
+dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture,
+ int depth, int level, unsigned *error,
+ void *loaderPrivate);
+#endif
+
+/* vim: set sw=3 ts=8 sts=3 expandtab: */
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am
new file mode 100644
index 000000000..fcd8b2c31
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.am
@@ -0,0 +1,35 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ $(GALLIUM_CFLAGS) \
+ $(VISIBILITY_CFLAGS) \
+ $(VL_CFLAGS) \
+ $(XCB_DRI3_CFLAGS) \
+ $(OMX_BELLAGIO_CFLAGS)
+
+noinst_LTLIBRARIES = libomxtracker.la
+
+libomxtracker_la_SOURCES = $(C_SOURCES)
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in
new file mode 100644
index 000000000..b4c346766
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.in
@@ -0,0 +1,899 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_LIBDRM_TRUE@am__append_1 = \
+@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+
+@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
+
+subdir = src/gallium/state_trackers/omx_bellagio
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+ $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+ $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+ $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_flex.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libomxtracker_la_LIBADD =
+am__objects_1 = entrypoint.lo vid_dec.lo vid_dec_mpeg12.lo \
+ vid_dec_h264.lo vid_dec_h265.lo vid_enc.lo
+am_libomxtracker_la_OBJECTS = $(am__objects_1)
+libomxtracker_la_OBJECTS = $(am_libomxtracker_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libomxtracker_la_SOURCES)
+DIST_SOURCES = $(libomxtracker_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+ $(top_srcdir)/bin/depcomp \
+ $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
+AMDGPU_LIBS = @AMDGPU_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+ANDROID_CFLAGS = @ANDROID_CFLAGS@
+ANDROID_LIBS = @ANDROID_LIBS@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
+ETNAVIV_LIBS = @ETNAVIV_LIBS@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLVND_CFLAGS = @GLVND_CFLAGS@
+GLVND_LIBS = @GLVND_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+I915_CFLAGS = @I915_CFLAGS@
+I915_LIBS = @I915_LIBS@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBATOMIC_LIBS = @LIBATOMIC_LIBS@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBELF_CFLAGS = @LIBELF_CFLAGS@
+LIBELF_LIBS = @LIBELF_LIBS@
+LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBS = @LLVM_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
+NVVIEUX_LIBS = @NVVIEUX_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENCL_VERSION = @OPENCL_VERSION@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
+PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PWR8_CFLAGS = @PWR8_CFLAGS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+RM = @RM@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
+SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
+SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
+SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
+SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+VALGRIND_LIBS = @VALGRIND_LIBS@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
+WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
+XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+C_SOURCES := \
+ entrypoint.c \
+ entrypoint.h \
+ vid_dec.c \
+ vid_dec.h \
+ vid_dec_mpeg12.c \
+ vid_dec_h264.c \
+ vid_dec_h265.c \
+ vid_enc.c \
+ vid_enc.h
+
+GALLIUM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES)
+
+
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_DRIVER_CXXFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CXXFLAGS)
+
+GALLIUM_TARGET_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
+ $(DEFINES) \
+ $(PTHREAD_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
+ $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_WINSYS_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_2) $(am__append_3)
+AM_CFLAGS = \
+ $(GALLIUM_CFLAGS) \
+ $(VISIBILITY_CFLAGS) \
+ $(VL_CFLAGS) \
+ $(XCB_DRI3_CFLAGS) \
+ $(OMX_BELLAGIO_CFLAGS)
+
+noinst_LTLIBRARIES = libomxtracker.la
+libomxtracker_la_SOURCES = $(C_SOURCES)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/state_trackers/omx_bellagio/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/gallium/state_trackers/omx_bellagio/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libomxtracker.la: $(libomxtracker_la_OBJECTS) $(libomxtracker_la_DEPENDENCIES) $(EXTRA_libomxtracker_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(libomxtracker_la_OBJECTS) $(libomxtracker_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entrypoint.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_h264.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_h265.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_dec_mpeg12.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vid_enc.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources
new file mode 100644
index 000000000..ab60ce803
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/Makefile.sources
@@ -0,0 +1,10 @@
+C_SOURCES := \
+ entrypoint.c \
+ entrypoint.h \
+ vid_dec.c \
+ vid_dec.h \
+ vid_dec_mpeg12.c \
+ vid_dec_h264.c \
+ vid_dec_h265.c \
+ vid_enc.c \
+ vid_enc.h
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c
new file mode 100644
index 000000000..251cc7d65
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.c
@@ -0,0 +1,152 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <stdbool.h>
+
+#if defined(HAVE_X11_PLATFORM)
+#include <X11/Xlib.h>
+#else
+#define XOpenDisplay(x) NULL
+#define XCloseDisplay(x)
+#define Display void
+#endif
+
+#include "os/os_thread.h"
+#include "util/u_memory.h"
+#include "loader/loader.h"
+
+#include "entrypoint.h"
+#include "vid_dec.h"
+#include "vid_enc.h"
+
+static mtx_t omx_lock = _MTX_INITIALIZER_NP;
+static Display *omx_display = NULL;
+static struct vl_screen *omx_screen = NULL;
+static unsigned omx_usecount = 0;
+static const char *omx_render_node = NULL;
+static int drm_fd;
+
+int omx_component_library_Setup(stLoaderComponentType **stComponents)
+{
+ OMX_ERRORTYPE r;
+
+ if (stComponents == NULL)
+ return 2;
+
+ /* component 0 - video decoder */
+ r = vid_dec_LoaderComponent(stComponents[0]);
+ if (r != OMX_ErrorNone)
+ return OMX_ErrorInsufficientResources;
+
+ /* component 1 - video encoder */
+ r = vid_enc_LoaderComponent(stComponents[1]);
+ if (r != OMX_ErrorNone)
+ return OMX_ErrorInsufficientResources;
+
+ return 2;
+}
+
+struct vl_screen *omx_get_screen(void)
+{
+ static bool first_time = true;
+ mtx_lock(&omx_lock);
+
+ if (!omx_screen) {
+ if (first_time) {
+ omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
+ first_time = false;
+ }
+ if (omx_render_node) {
+ drm_fd = loader_open_device(omx_render_node);
+ if (drm_fd < 0)
+ goto error;
+
+ omx_screen = vl_drm_screen_create(drm_fd);
+ if (!omx_screen) {
+ close(drm_fd);
+ goto error;
+ }
+ } else {
+ omx_display = XOpenDisplay(NULL);
+ if (!omx_display)
+ goto error;
+
+ omx_screen = vl_dri3_screen_create(omx_display, 0);
+ if (!omx_screen)
+ omx_screen = vl_dri2_screen_create(omx_display, 0);
+ if (!omx_screen) {
+ XCloseDisplay(omx_display);
+ goto error;
+ }
+ }
+ }
+
+ ++omx_usecount;
+
+ mtx_unlock(&omx_lock);
+ return omx_screen;
+
+error:
+ mtx_unlock(&omx_lock);
+ return NULL;
+}
+
+void omx_put_screen(void)
+{
+ mtx_lock(&omx_lock);
+ if ((--omx_usecount) == 0) {
+ omx_screen->destroy(omx_screen);
+ omx_screen = NULL;
+
+ if (omx_render_node)
+ close(drm_fd);
+ else
+ XCloseDisplay(omx_display);
+ }
+ mtx_unlock(&omx_lock);
+}
+
+OMX_ERRORTYPE omx_workaround_Destructor(OMX_COMPONENTTYPE *comp)
+{
+ omx_base_component_PrivateType* priv = (omx_base_component_PrivateType*)comp->pComponentPrivate;
+
+ priv->state = OMX_StateInvalid;
+ tsem_up(priv->messageSem);
+
+ /* wait for thread to exit */
+ pthread_join(priv->messageHandlerThread, NULL);
+
+ return omx_base_component_Destructor(comp);
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h
new file mode 100644
index 000000000..7625d7a5b
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/entrypoint.h
@@ -0,0 +1,48 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef OMX_ENTRYPOINT_H
+#define OMX_ENTRYPOINT_H
+
+#include <bellagio/st_static_component_loader.h>
+
+#include "vl/vl_winsys.h"
+
+PUBLIC extern int omx_component_library_Setup(stLoaderComponentType **stComponents);
+
+struct vl_screen *omx_get_screen(void);
+void omx_put_screen(void);
+
+OMX_ERRORTYPE omx_workaround_Destructor(OMX_COMPONENTTYPE *comp);
+
+#endif
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c
new file mode 100644
index 000000000..f9fe19f63
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.c
@@ -0,0 +1,644 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+
+#include <assert.h>
+
+#include <OMX_Video.h>
+
+/* bellagio defines a DEBUG macro that we don't want */
+#ifndef DEBUG
+#include <bellagio/omxcore.h>
+#undef DEBUG
+#else
+#include <bellagio/omxcore.h>
+#endif
+
+#include "pipe/p_screen.h"
+#include "pipe/p_video_codec.h"
+#include "util/u_memory.h"
+#include "util/u_surface.h"
+#include "vl/vl_video_buffer.h"
+#include "vl/vl_vlc.h"
+
+#include "entrypoint.h"
+#include "vid_dec.h"
+
+static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name);
+static OMX_ERRORTYPE vid_dec_Destructor(OMX_COMPONENTTYPE *comp);
+static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param);
+static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param);
+static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE *comp, internalRequestMessageType *msg);
+static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf);
+static OMX_ERRORTYPE vid_dec_FreeDecBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf);
+static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output);
+
+OMX_ERRORTYPE vid_dec_LoaderComponent(stLoaderComponentType *comp)
+{
+ comp->componentVersion.s.nVersionMajor = 0;
+ comp->componentVersion.s.nVersionMinor = 0;
+ comp->componentVersion.s.nRevision = 0;
+ comp->componentVersion.s.nStep = 1;
+ comp->name_specific_length = 3;
+
+ comp->name = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->name == NULL)
+ goto error;
+
+ comp->name_specific = CALLOC(comp->name_specific_length, sizeof(char *));
+ if (comp->name_specific == NULL)
+ goto error;
+
+ comp->role_specific = CALLOC(comp->name_specific_length, sizeof(char *));
+ if (comp->role_specific == NULL)
+ goto error;
+
+ comp->name_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->name_specific[0] == NULL)
+ goto error_specific;
+
+ comp->name_specific[1] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->name_specific[1] == NULL)
+ goto error_specific;
+
+ comp->name_specific[2] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->name_specific[2] == NULL)
+ goto error_specific;
+
+ comp->role_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->role_specific[0] == NULL)
+ goto error_specific;
+
+ comp->role_specific[1] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->role_specific[1] == NULL)
+ goto error_specific;
+
+ comp->role_specific[2] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->role_specific[2] == NULL)
+ goto error_specific;
+
+ strcpy(comp->name, OMX_VID_DEC_BASE_NAME);
+ strcpy(comp->name_specific[0], OMX_VID_DEC_MPEG2_NAME);
+ strcpy(comp->name_specific[1], OMX_VID_DEC_AVC_NAME);
+ strcpy(comp->name_specific[2], OMX_VID_DEC_HEVC_NAME);
+
+ strcpy(comp->role_specific[0], OMX_VID_DEC_MPEG2_ROLE);
+ strcpy(comp->role_specific[1], OMX_VID_DEC_AVC_ROLE);
+ strcpy(comp->role_specific[2], OMX_VID_DEC_HEVC_ROLE);
+
+ comp->constructor = vid_dec_Constructor;
+
+ return OMX_ErrorNone;
+
+error_specific:
+ FREE(comp->role_specific[2]);
+ FREE(comp->role_specific[1]);
+ FREE(comp->role_specific[0]);
+ FREE(comp->name_specific[2]);
+ FREE(comp->name_specific[1]);
+ FREE(comp->name_specific[0]);
+
+error:
+ FREE(comp->role_specific);
+ FREE(comp->name_specific);
+
+ FREE(comp->name);
+
+ return OMX_ErrorInsufficientResources;
+}
+
+static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name)
+{
+ vid_dec_PrivateType *priv;
+ omx_base_video_PortType *port;
+ struct pipe_screen *screen;
+ OMX_ERRORTYPE r;
+ int i;
+
+ assert(!comp->pComponentPrivate);
+
+ priv = comp->pComponentPrivate = CALLOC(1, sizeof(vid_dec_PrivateType));
+ if (!priv)
+ return OMX_ErrorInsufficientResources;
+
+ r = omx_base_filter_Constructor(comp, name);
+ if (r)
+ return r;
+
+ priv->profile = PIPE_VIDEO_PROFILE_UNKNOWN;
+
+ if (!strcmp(name, OMX_VID_DEC_MPEG2_NAME))
+ priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+
+ if (!strcmp(name, OMX_VID_DEC_AVC_NAME))
+ priv->profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+
+ if (!strcmp(name, OMX_VID_DEC_HEVC_NAME))
+ priv->profile = PIPE_VIDEO_PROFILE_HEVC_MAIN;
+
+ priv->BufferMgmtCallback = vid_dec_FrameDecoded;
+ priv->messageHandler = vid_dec_MessageHandler;
+ priv->destructor = vid_dec_Destructor;
+
+ comp->SetParameter = vid_dec_SetParameter;
+ comp->GetParameter = vid_dec_GetParameter;
+
+ priv->screen = omx_get_screen();
+ if (!priv->screen)
+ return OMX_ErrorInsufficientResources;
+
+ screen = priv->screen->pscreen;
+ priv->pipe = screen->context_create(screen, NULL, 0);
+ if (!priv->pipe)
+ return OMX_ErrorInsufficientResources;
+
+ if (!vl_compositor_init(&priv->compositor, priv->pipe)) {
+ priv->pipe->destroy(priv->pipe);
+ priv->pipe = NULL;
+ return OMX_ErrorInsufficientResources;
+ }
+
+ if (!vl_compositor_init_state(&priv->cstate, priv->pipe)) {
+ vl_compositor_cleanup(&priv->compositor);
+ priv->pipe->destroy(priv->pipe);
+ priv->pipe = NULL;
+ return OMX_ErrorInsufficientResources;
+ }
+
+ priv->sPortTypesParam[OMX_PortDomainVideo].nStartPortNumber = 0;
+ priv->sPortTypesParam[OMX_PortDomainVideo].nPorts = 2;
+ priv->ports = CALLOC(2, sizeof(omx_base_PortType *));
+ if (!priv->ports)
+ return OMX_ErrorInsufficientResources;
+
+ for (i = 0; i < 2; ++i) {
+ priv->ports[i] = CALLOC(1, sizeof(omx_base_video_PortType));
+ if (!priv->ports[i])
+ return OMX_ErrorInsufficientResources;
+
+ base_video_port_Constructor(comp, &priv->ports[i], i, i == 0);
+ }
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ strcpy(port->sPortParam.format.video.cMIMEType,"video/MPEG2");
+ port->sPortParam.nBufferCountMin = 8;
+ port->sPortParam.nBufferCountActual = 8;
+ port->sPortParam.nBufferSize = DEFAULT_OUT_BUFFER_SIZE;
+ port->sPortParam.format.video.nFrameWidth = 176;
+ port->sPortParam.format.video.nFrameHeight = 144;
+ port->sPortParam.format.video.eCompressionFormat = OMX_VIDEO_CodingMPEG2;
+ port->sVideoParam.eCompressionFormat = OMX_VIDEO_CodingMPEG2;
+ port->Port_SendBufferFunction = vid_dec_DecodeBuffer;
+ port->Port_FreeBuffer = vid_dec_FreeDecBuffer;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
+ port->sPortParam.nBufferCountActual = 8;
+ port->sPortParam.nBufferCountMin = 4;
+ port->sPortParam.format.video.nFrameWidth = 176;
+ port->sPortParam.format.video.nFrameHeight = 144;
+ port->sPortParam.format.video.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar;
+ port->sVideoParam.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar;
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_dec_Destructor(OMX_COMPONENTTYPE *comp)
+{
+ vid_dec_PrivateType* priv = comp->pComponentPrivate;
+ int i;
+
+ if (priv->ports) {
+ for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) {
+ if(priv->ports[i])
+ priv->ports[i]->PortDestructor(priv->ports[i]);
+ }
+ FREE(priv->ports);
+ priv->ports=NULL;
+ }
+
+ if (priv->pipe) {
+ vl_compositor_cleanup_state(&priv->cstate);
+ vl_compositor_cleanup(&priv->compositor);
+ priv->pipe->destroy(priv->pipe);
+ }
+
+ if (priv->screen)
+ omx_put_screen();
+
+ return omx_workaround_Destructor(comp);
+}
+
+static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_dec_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+
+ if (!param)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexParamPortDefinition: {
+ OMX_PARAM_PORTDEFINITIONTYPE *def = param;
+
+ r = omx_base_component_SetParameter(handle, idx, param);
+ if (r)
+ return r;
+
+ if (def->nPortIndex == OMX_BASE_FILTER_INPUTPORT_INDEX) {
+ omx_base_video_PortType *port;
+ unsigned framesize = def->format.video.nFrameWidth * def->format.video.nFrameHeight;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ port->sPortParam.nBufferSize = framesize * 512 / (16*16);
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
+ port->sPortParam.format.video.nFrameWidth = def->format.video.nFrameWidth;
+ port->sPortParam.format.video.nFrameHeight = def->format.video.nFrameHeight;
+ port->sPortParam.format.video.nStride = def->format.video.nFrameWidth;
+ port->sPortParam.format.video.nSliceHeight = def->format.video.nFrameHeight;
+ port->sPortParam.nBufferSize = framesize*3/2;
+
+ priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged,
+ OMX_BASE_FILTER_OUTPUTPORT_INDEX, 0, NULL);
+ }
+ break;
+ }
+ case OMX_IndexParamStandardComponentRole: {
+ OMX_PARAM_COMPONENTROLETYPE *role = param;
+
+ r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
+ if (r)
+ return r;
+
+ if (!strcmp((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE)) {
+ priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+ } else if (!strcmp((char *)role->cRole, OMX_VID_DEC_AVC_ROLE)) {
+ priv->profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+ } else if (!strcmp((char *)role->cRole, OMX_VID_DEC_HEVC_ROLE)) {
+ priv->profile = PIPE_VIDEO_PROFILE_HEVC_MAIN;
+ } else {
+ return OMX_ErrorBadParameter;
+ }
+
+ break;
+ }
+ case OMX_IndexParamVideoPortFormat: {
+ OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param;
+ omx_base_video_PortType *port;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ if (r)
+ return r;
+
+ if (format->nPortIndex > 1)
+ return OMX_ErrorBadPortIndex;
+
+ port = (omx_base_video_PortType *)priv->ports[format->nPortIndex];
+ memcpy(&port->sVideoParam, format, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ break;
+ }
+ default:
+ return omx_base_component_SetParameter(handle, idx, param);
+ }
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_dec_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+
+ if (!param)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexParamStandardComponentRole: {
+ OMX_PARAM_COMPONENTROLETYPE *role = param;
+
+ r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
+ if (r)
+ return r;
+
+ if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN)
+ strcpy((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE);
+ else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)
+ strcpy((char *)role->cRole, OMX_VID_DEC_AVC_ROLE);
+ else if (priv->profile == PIPE_VIDEO_PROFILE_HEVC_MAIN)
+ strcpy((char *)role->cRole, OMX_VID_DEC_HEVC_ROLE);
+
+ break;
+ }
+
+ case OMX_IndexParamVideoInit:
+ r = checkHeader(param, sizeof(OMX_PORT_PARAM_TYPE));
+ if (r)
+ return r;
+
+ memcpy(param, &priv->sPortTypesParam[OMX_PortDomainVideo], sizeof(OMX_PORT_PARAM_TYPE));
+ break;
+
+ case OMX_IndexParamVideoPortFormat: {
+ OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param;
+ omx_base_video_PortType *port;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ if (r)
+ return r;
+
+ if (format->nPortIndex > 1)
+ return OMX_ErrorBadPortIndex;
+
+ port = (omx_base_video_PortType *)priv->ports[format->nPortIndex];
+ memcpy(format, &port->sVideoParam, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ break;
+ }
+
+ default:
+ return omx_base_component_GetParameter(handle, idx, param);
+
+ }
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE* comp, internalRequestMessageType *msg)
+{
+ vid_dec_PrivateType* priv = comp->pComponentPrivate;
+
+ if (msg->messageType == OMX_CommandStateSet) {
+ if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) {
+ if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN)
+ vid_dec_mpeg12_Init(priv);
+ else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)
+ vid_dec_h264_Init(priv);
+ else if (priv->profile == PIPE_VIDEO_PROFILE_HEVC_MAIN)
+ vid_dec_h265_Init(priv);
+
+ } else if ((msg->messageParam == OMX_StateLoaded) && (priv->state == OMX_StateIdle)) {
+ if (priv->shadow) {
+ priv->shadow->destroy(priv->shadow);
+ priv->shadow = NULL;
+ }
+ if (priv->codec) {
+ priv->codec->destroy(priv->codec);
+ priv->codec = NULL;
+ }
+ }
+ }
+
+ return omx_base_component_MessageHandler(comp, msg);
+}
+
+void vid_dec_NeedTarget(vid_dec_PrivateType *priv)
+{
+ struct pipe_video_buffer templat = {};
+ struct vl_screen *omx_screen;
+ struct pipe_screen *pscreen;
+
+ omx_screen = priv->screen;
+ assert(omx_screen);
+
+ pscreen = omx_screen->pscreen;
+ assert(pscreen);
+
+ if (!priv->target) {
+ memset(&templat, 0, sizeof(templat));
+
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = priv->codec->width;
+ templat.height = priv->codec->height;
+ templat.buffer_format = pscreen->get_video_param(
+ pscreen,
+ PIPE_VIDEO_PROFILE_UNKNOWN,
+ PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+ PIPE_VIDEO_CAP_PREFERED_FORMAT
+ );
+ templat.interlaced = pscreen->get_video_param(
+ pscreen,
+ PIPE_VIDEO_PROFILE_UNKNOWN,
+ PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+ PIPE_VIDEO_CAP_PREFERS_INTERLACED
+ );
+ priv->target = priv->pipe->create_video_buffer(priv->pipe, &templat);
+ }
+}
+
+static void vid_dec_FreeInputPortPrivate(OMX_BUFFERHEADERTYPE *buf)
+{
+ struct pipe_video_buffer *vbuf = buf->pInputPortPrivate;
+ if (!vbuf)
+ return;
+
+ vbuf->destroy(vbuf);
+ buf->pInputPortPrivate = NULL;
+}
+
+static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_dec_PrivateType *priv = comp->pComponentPrivate;
+ unsigned i = priv->num_in_buffers++;
+ OMX_ERRORTYPE r;
+
+ priv->in_buffers[i] = buf;
+ priv->sizes[i] = buf->nFilledLen;
+ priv->inputs[i] = buf->pBuffer;
+ priv->timestamps[i] = buf->nTimeStamp;
+
+ while (priv->num_in_buffers > (!!(buf->nFlags & OMX_BUFFERFLAG_EOS) ? 0 : 1)) {
+ bool eos = !!(priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS);
+ unsigned min_bits_left = eos ? 32 : MAX2(buf->nFilledLen * 8, 32);
+ struct vl_vlc vlc;
+
+ vl_vlc_init(&vlc, priv->num_in_buffers, priv->inputs, priv->sizes);
+
+ if (priv->slice)
+ priv->bytes_left = vl_vlc_bits_left(&vlc) / 8;
+
+ while (vl_vlc_bits_left(&vlc) > min_bits_left) {
+ priv->Decode(priv, &vlc, min_bits_left);
+ vl_vlc_fillbits(&vlc);
+ }
+
+ if (priv->slice) {
+ unsigned bytes = priv->bytes_left - vl_vlc_bits_left(&vlc) / 8;
+
+ priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base,
+ 1, &priv->slice, &bytes);
+
+ if (priv->num_in_buffers)
+ priv->slice = priv->inputs[1];
+ else
+ priv->slice = NULL;
+ }
+
+ if (eos && priv->frame_started)
+ priv->EndFrame(priv);
+
+ if (priv->frame_finished) {
+ priv->frame_finished = false;
+ priv->in_buffers[0]->nFilledLen = priv->in_buffers[0]->nAllocLen;
+ r = base_port_SendBufferFunction(port, priv->in_buffers[0]);
+ } else if (eos) {
+ vid_dec_FreeInputPortPrivate(priv->in_buffers[0]);
+ priv->in_buffers[0]->nFilledLen = priv->in_buffers[0]->nAllocLen;
+ r = base_port_SendBufferFunction(port, priv->in_buffers[0]);
+ } else {
+ priv->in_buffers[0]->nFilledLen = 0;
+ r = port->ReturnBufferFunction(port, priv->in_buffers[0]);
+ }
+
+ if (--priv->num_in_buffers) {
+ unsigned delta = MIN2((min_bits_left - vl_vlc_bits_left(&vlc)) / 8, priv->sizes[1]);
+
+ priv->in_buffers[0] = priv->in_buffers[1];
+ priv->sizes[0] = priv->sizes[1] - delta;
+ priv->inputs[0] = priv->inputs[1] + delta;
+ priv->timestamps[0] = priv->timestamps[1];
+ }
+
+ if (r)
+ return r;
+ }
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_dec_FreeDecBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf)
+{
+ vid_dec_FreeInputPortPrivate(buf);
+ return base_port_FreeBuffer(port, idx, buf);
+}
+
+static void vid_dec_FillOutput(vid_dec_PrivateType *priv, struct pipe_video_buffer *buf,
+ OMX_BUFFERHEADERTYPE* output)
+{
+ omx_base_PortType *port = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
+ OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video;
+
+ struct pipe_sampler_view **views;
+ unsigned i, j;
+ unsigned width, height;
+
+ views = buf->get_sampler_view_planes(buf);
+
+ for (i = 0; i < 2 /* NV12 */; i++) {
+ if (!views[i]) continue;
+ width = def->nFrameWidth;
+ height = def->nFrameHeight;
+ vl_video_buffer_adjust_size(&width, &height, i, buf->chroma_format, buf->interlaced);
+ for (j = 0; j < views[i]->texture->array_size; ++j) {
+ struct pipe_box box = {0, 0, j, width, height, 1};
+ struct pipe_transfer *transfer;
+ uint8_t *map, *dst;
+ map = priv->pipe->transfer_map(priv->pipe, views[i]->texture, 0,
+ PIPE_TRANSFER_READ, &box, &transfer);
+ if (!map)
+ return;
+
+ dst = ((uint8_t*)output->pBuffer + output->nOffset) + j * def->nStride +
+ i * def->nFrameWidth * def->nFrameHeight;
+ util_copy_rect(dst,
+ views[i]->texture->format,
+ def->nStride * views[i]->texture->array_size, 0, 0,
+ box.width, box.height, map, transfer->stride, 0, 0);
+
+ pipe_transfer_unmap(priv->pipe, transfer);
+ }
+ }
+}
+
+static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input,
+ OMX_BUFFERHEADERTYPE* output)
+{
+ vid_dec_PrivateType *priv = comp->pComponentPrivate;
+ bool eos = !!(input->nFlags & OMX_BUFFERFLAG_EOS);
+ OMX_TICKS timestamp;
+
+ if (!input->pInputPortPrivate) {
+ input->pInputPortPrivate = priv->Flush(priv, &timestamp);
+ if (timestamp != OMX_VID_DEC_TIMESTAMP_INVALID)
+ input->nTimeStamp = timestamp;
+ }
+
+ if (input->pInputPortPrivate) {
+ if (output->pInputPortPrivate && !priv->disable_tunnel) {
+ struct pipe_video_buffer *tmp, *vbuf, *new_vbuf;
+
+ tmp = output->pOutputPortPrivate;
+ vbuf = input->pInputPortPrivate;
+ if (vbuf->interlaced) {
+ /* re-allocate the progressive buffer */
+ omx_base_video_PortType *port;
+ struct pipe_video_buffer templat = {};
+ struct u_rect src_rect, dst_rect;
+
+ port = (omx_base_video_PortType *)
+ priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ memset(&templat, 0, sizeof(templat));
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = port->sPortParam.format.video.nFrameWidth;
+ templat.height = port->sPortParam.format.video.nFrameHeight;
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.interlaced = false;
+ new_vbuf = priv->pipe->create_video_buffer(priv->pipe, &templat);
+
+ /* convert the interlaced to the progressive */
+ src_rect.x0 = dst_rect.x0 = 0;
+ src_rect.x1 = dst_rect.x1 = templat.width;
+ src_rect.y0 = dst_rect.y0 = 0;
+ src_rect.y1 = dst_rect.y1 = templat.height;
+
+ vl_compositor_yuv_deint_full(&priv->cstate, &priv->compositor,
+ input->pInputPortPrivate, new_vbuf,
+ &src_rect, &dst_rect, VL_COMPOSITOR_WEAVE);
+
+ /* set the progrssive buffer for next round */
+ vbuf->destroy(vbuf);
+ input->pInputPortPrivate = new_vbuf;
+ }
+ output->pOutputPortPrivate = input->pInputPortPrivate;
+ input->pInputPortPrivate = tmp;
+ } else {
+ vid_dec_FillOutput(priv, input->pInputPortPrivate, output);
+ }
+ output->nFilledLen = output->nAllocLen;
+ output->nTimeStamp = input->nTimeStamp;
+ }
+
+ if (eos && input->pInputPortPrivate)
+ vid_dec_FreeInputPortPrivate(input);
+ else
+ input->nFilledLen = 0;
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h
new file mode 100644
index 000000000..7a10e750d
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec.h
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef OMX_VID_DEC_H
+#define OMX_VID_DEC_H
+
+#include <string.h>
+
+#include <OMX_Types.h>
+#include <OMX_Component.h>
+#include <OMX_Core.h>
+
+#include <bellagio/st_static_component_loader.h>
+#include <bellagio/omx_base_filter.h>
+#include <bellagio/omx_base_video_port.h>
+
+#include "pipe/p_video_state.h"
+#include "os/os_thread.h"
+#include "util/list.h"
+
+#include "vl/vl_compositor.h"
+
+#define OMX_VID_DEC_BASE_NAME "OMX.mesa.video_decoder"
+
+#define OMX_VID_DEC_MPEG2_NAME "OMX.mesa.video_decoder.mpeg2"
+#define OMX_VID_DEC_MPEG2_ROLE "video_decoder.mpeg2"
+
+#define OMX_VID_DEC_AVC_NAME "OMX.mesa.video_decoder.avc"
+#define OMX_VID_DEC_AVC_ROLE "video_decoder.avc"
+
+#define OMX_VID_DEC_HEVC_NAME "OMX.mesa.video_decoder.hevc"
+#define OMX_VID_DEC_HEVC_ROLE "video_decoder.hevc"
+
+#define OMX_VID_DEC_TIMESTAMP_INVALID ((OMX_TICKS) -1)
+
+struct vl_vlc;
+
+DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
+#define vid_dec_PrivateType_FIELDS omx_base_filter_PrivateType_FIELDS \
+ enum pipe_video_profile profile; \
+ struct vl_screen *screen; \
+ struct pipe_context *pipe; \
+ struct pipe_video_codec *codec; \
+ void (*Decode)(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); \
+ void (*EndFrame)(vid_dec_PrivateType *priv); \
+ struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); \
+ struct pipe_video_buffer *target, *shadow; \
+ union { \
+ struct { \
+ uint8_t intra_matrix[64]; \
+ uint8_t non_intra_matrix[64]; \
+ } mpeg12; \
+ struct { \
+ unsigned nal_ref_idc; \
+ bool IdrPicFlag; \
+ unsigned idr_pic_id; \
+ unsigned pic_order_cnt_lsb; \
+ unsigned pic_order_cnt_msb; \
+ unsigned delta_pic_order_cnt_bottom; \
+ unsigned delta_pic_order_cnt[2]; \
+ unsigned prevFrameNumOffset; \
+ struct pipe_h264_sps sps[32]; \
+ struct pipe_h264_pps pps[256]; \
+ struct list_head dpb_list; \
+ unsigned dpb_num; \
+ } h264; \
+ struct { \
+ unsigned temporal_id; \
+ unsigned level_idc; \
+ unsigned pic_width_in_luma_samples; \
+ unsigned pic_height_in_luma_samples; \
+ bool IdrPicFlag; \
+ int slice_prev_poc; \
+ void *ref_pic_set_list; \
+ void *rps; \
+ struct pipe_h265_sps sps[16]; \
+ struct pipe_h265_pps pps[64]; \
+ struct list_head dpb_list; \
+ unsigned dpb_num; \
+ } h265; \
+ } codec_data; \
+ union { \
+ struct pipe_picture_desc base; \
+ struct pipe_mpeg12_picture_desc mpeg12; \
+ struct pipe_h264_picture_desc h264; \
+ struct pipe_h265_picture_desc h265; \
+ } picture; \
+ unsigned num_in_buffers; \
+ OMX_BUFFERHEADERTYPE *in_buffers[2]; \
+ const void *inputs[2]; \
+ unsigned sizes[2]; \
+ OMX_TICKS timestamps[2]; \
+ OMX_TICKS timestamp; \
+ bool first_buf_in_frame; \
+ bool frame_finished; \
+ bool frame_started; \
+ unsigned bytes_left; \
+ const void *slice; \
+ bool disable_tunnel; \
+ struct vl_compositor compositor; \
+ struct vl_compositor_state cstate;
+ENDCLASS(vid_dec_PrivateType)
+
+OMX_ERRORTYPE vid_dec_LoaderComponent(stLoaderComponentType *comp);
+
+/* used by MPEG12 and H264 implementation */
+void vid_dec_NeedTarget(vid_dec_PrivateType *priv);
+
+/* vid_dec_mpeg12.c */
+void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv);
+
+/* vid_dec_h264.c */
+void vid_dec_h264_Init(vid_dec_PrivateType *priv);
+
+/* vid_dec_h265.c */
+void vid_dec_h265_Init(vid_dec_PrivateType *priv);
+
+#endif
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c
new file mode 100644
index 000000000..7ea71c104
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h264.c
@@ -0,0 +1,1032 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include "pipe/p_video_codec.h"
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "vl/vl_rbsp.h"
+#include "vl/vl_zscan.h"
+
+#include "entrypoint.h"
+#include "vid_dec.h"
+
+#define DPB_MAX_SIZE 5
+
+struct dpb_list {
+ struct list_head list;
+ struct pipe_video_buffer *buffer;
+ OMX_TICKS timestamp;
+ int poc;
+};
+
+static const uint8_t Default_4x4_Intra[16] = {
+ 6, 13, 20, 28, 13, 20, 28, 32,
+ 20, 28, 32, 37, 28, 32, 37, 42
+};
+
+static const uint8_t Default_4x4_Inter[16] = {
+ 10, 14, 20, 24, 14, 20, 24, 27,
+ 20, 24, 27, 30, 24, 27, 30, 34
+};
+
+static const uint8_t Default_8x8_Intra[64] = {
+ 6, 10, 13, 16, 18, 23, 25, 27,
+ 10, 11, 16, 18, 23, 25, 27, 29,
+ 13, 16, 18, 23, 25, 27, 29, 31,
+ 16, 18, 23, 25, 27, 29, 31, 33,
+ 18, 23, 25, 27, 29, 31, 33, 36,
+ 23, 25, 27, 29, 31, 33, 36, 38,
+ 25, 27, 29, 31, 33, 36, 38, 40,
+ 27, 29, 31, 33, 36, 38, 40, 42
+};
+
+static const uint8_t Default_8x8_Inter[64] = {
+ 9, 13, 15, 17, 19, 21, 22, 24,
+ 13, 13, 17, 19, 21, 22, 24, 25,
+ 15, 17, 19, 21, 22, 24, 25, 27,
+ 17, 19, 21, 22, 24, 25, 27, 28,
+ 19, 21, 22, 24, 25, 27, 28, 30,
+ 21, 22, 24, 25, 27, 28, 30, 32,
+ 22, 24, 25, 27, 28, 30, 32, 33,
+ 24, 25, 27, 28, 30, 32, 33, 35
+};
+
+static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
+static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv);
+static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
+
+void vid_dec_h264_Init(vid_dec_PrivateType *priv)
+{
+ priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+
+ priv->Decode = vid_dec_h264_Decode;
+ priv->EndFrame = vid_dec_h264_EndFrame;
+ priv->Flush = vid_dec_h264_Flush;
+
+ LIST_INITHEAD(&priv->codec_data.h264.dpb_list);
+ priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX;
+ priv->first_buf_in_frame = true;
+}
+
+static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
+{
+ //TODO: sane buffer handling
+
+ if (priv->frame_started)
+ return;
+
+ if (!priv->codec) {
+ struct pipe_video_codec templat = {};
+ omx_base_video_PortType *port;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ templat.profile = priv->profile;
+ templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.max_references = priv->picture.h264.num_ref_frames;
+ templat.expect_chunked_decode = true;
+ templat.width = port->sPortParam.format.video.nFrameWidth;
+ templat.height = port->sPortParam.format.video.nFrameHeight;
+ templat.level = priv->picture.h264.pps->sps->level_idc;
+
+ priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
+ }
+
+ vid_dec_NeedTarget(priv);
+
+ if (priv->first_buf_in_frame)
+ priv->timestamp = priv->timestamps[0];
+ priv->first_buf_in_frame = false;
+
+ priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames;
+
+ priv->picture.h264.slice_count = 0;
+ priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = true;
+}
+
+static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv,
+ OMX_TICKS *timestamp)
+{
+ struct dpb_list *entry, *result = NULL;
+ struct pipe_video_buffer *buf;
+
+ /* search for the lowest poc and break on zeros */
+ LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h264.dpb_list, list) {
+
+ if (result && entry->poc == 0)
+ break;
+
+ if (!result || entry->poc < result->poc)
+ result = entry;
+ }
+
+ if (!result)
+ return NULL;
+
+ buf = result->buffer;
+ if (timestamp)
+ *timestamp = result->timestamp;
+
+ --priv->codec_data.h264.dpb_num;
+ LIST_DEL(&result->list);
+ FREE(result);
+
+ return buf;
+}
+
+static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
+{
+ struct dpb_list *entry;
+ struct pipe_video_buffer *tmp;
+ bool top_field_first;
+ OMX_TICKS timestamp;
+
+ if (!priv->frame_started)
+ return;
+
+ priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = false;
+
+ // TODO: implement frame number handling
+ priv->picture.h264.frame_num_list[0] = priv->picture.h264.frame_num;
+ priv->picture.h264.field_order_cnt_list[0][0] = priv->picture.h264.frame_num;
+ priv->picture.h264.field_order_cnt_list[0][1] = priv->picture.h264.frame_num;
+
+ top_field_first = priv->picture.h264.field_order_cnt[0] < priv->picture.h264.field_order_cnt[1];
+
+ if (priv->picture.h264.field_pic_flag && priv->picture.h264.bottom_field_flag != top_field_first)
+ return;
+
+ /* add the decoded picture to the dpb list */
+ entry = CALLOC_STRUCT(dpb_list);
+ if (!entry)
+ return;
+
+ priv->first_buf_in_frame = true;
+ entry->buffer = priv->target;
+ entry->timestamp = priv->timestamp;
+ entry->poc = MIN2(priv->picture.h264.field_order_cnt[0], priv->picture.h264.field_order_cnt[1]);
+ LIST_ADDTAIL(&entry->list, &priv->codec_data.h264.dpb_list);
+ ++priv->codec_data.h264.dpb_num;
+ priv->target = NULL;
+ priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX;
+
+ if (priv->codec_data.h264.dpb_num <= DPB_MAX_SIZE)
+ return;
+
+ tmp = priv->in_buffers[0]->pInputPortPrivate;
+ priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv, &timestamp);
+ priv->in_buffers[0]->nTimeStamp = timestamp;
+ priv->target = tmp;
+ priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL;
+}
+
+static void vui_parameters(struct vl_rbsp *rbsp)
+{
+ // TODO
+}
+
+static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned sizeOfScalingList,
+ const uint8_t *defaultList, const uint8_t *fallbackList)
+{
+ unsigned lastScale = 8, nextScale = 8;
+ const int *list;
+ unsigned i;
+
+ /* (pic|seq)_scaling_list_present_flag[i] */
+ if (!vl_rbsp_u(rbsp, 1)) {
+ if (fallbackList)
+ memcpy(scalingList, fallbackList, sizeOfScalingList);
+ return;
+ }
+
+ list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal;
+ for (i = 0; i < sizeOfScalingList; ++i ) {
+
+ if (nextScale != 0) {
+ signed delta_scale = vl_rbsp_se(rbsp);
+ nextScale = (lastScale + delta_scale + 256) % 256;
+ if (i == 0 && nextScale == 0) {
+ memcpy(scalingList, defaultList, sizeOfScalingList);
+ return;
+ }
+ }
+ scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale;
+ lastScale = scalingList[list[i]];
+ }
+}
+
+static struct pipe_h264_sps *seq_parameter_set_id(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ unsigned id = vl_rbsp_ue(rbsp);
+ if (id >= ARRAY_SIZE(priv->codec_data.h264.sps))
+ return NULL; /* invalid seq_parameter_set_id */
+
+ return &priv->codec_data.h264.sps[id];
+}
+
+static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ struct pipe_h264_sps *sps;
+ unsigned profile_idc, level_idc;
+ unsigned i;
+
+ /* Sequence parameter set */
+ profile_idc = vl_rbsp_u(rbsp, 8);
+
+ /* constraint_set0_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* constraint_set1_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* constraint_set2_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* constraint_set3_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* constraint_set4_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* constraint_set5_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* reserved_zero_2bits */
+ vl_rbsp_u(rbsp, 2);
+
+ /* level_idc */
+ level_idc = vl_rbsp_u(rbsp, 8);
+
+ sps = seq_parameter_set_id(priv, rbsp);
+ if (!sps)
+ return;
+
+ memset(sps, 0, sizeof(*sps));
+ memset(sps->ScalingList4x4, 16, sizeof(sps->ScalingList4x4));
+ memset(sps->ScalingList8x8, 16, sizeof(sps->ScalingList8x8));
+
+ sps->level_idc = level_idc;
+
+ if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 ||
+ profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 ||
+ profile_idc == 128 || profile_idc == 138) {
+
+ sps->chroma_format_idc = vl_rbsp_ue(rbsp);
+
+ if (sps->chroma_format_idc == 3)
+ sps->separate_colour_plane_flag = vl_rbsp_u(rbsp, 1);
+
+ sps->bit_depth_luma_minus8 = vl_rbsp_ue(rbsp);
+
+ sps->bit_depth_chroma_minus8 = vl_rbsp_ue(rbsp);
+
+ /* qpprime_y_zero_transform_bypass_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ sps->seq_scaling_matrix_present_flag = vl_rbsp_u(rbsp, 1);
+ if (sps->seq_scaling_matrix_present_flag) {
+
+ scaling_list(rbsp, sps->ScalingList4x4[0], 16, Default_4x4_Intra, Default_4x4_Intra);
+ scaling_list(rbsp, sps->ScalingList4x4[1], 16, Default_4x4_Intra, sps->ScalingList4x4[0]);
+ scaling_list(rbsp, sps->ScalingList4x4[2], 16, Default_4x4_Intra, sps->ScalingList4x4[1]);
+ scaling_list(rbsp, sps->ScalingList4x4[3], 16, Default_4x4_Inter, Default_4x4_Inter);
+ scaling_list(rbsp, sps->ScalingList4x4[4], 16, Default_4x4_Inter, sps->ScalingList4x4[3]);
+ scaling_list(rbsp, sps->ScalingList4x4[5], 16, Default_4x4_Inter, sps->ScalingList4x4[4]);
+
+ scaling_list(rbsp, sps->ScalingList8x8[0], 64, Default_8x8_Intra, Default_8x8_Intra);
+ scaling_list(rbsp, sps->ScalingList8x8[1], 64, Default_8x8_Inter, Default_8x8_Inter);
+ if (sps->chroma_format_idc == 3) {
+ scaling_list(rbsp, sps->ScalingList8x8[2], 64, Default_8x8_Intra, sps->ScalingList8x8[0]);
+ scaling_list(rbsp, sps->ScalingList8x8[3], 64, Default_8x8_Inter, sps->ScalingList8x8[1]);
+ scaling_list(rbsp, sps->ScalingList8x8[4], 64, Default_8x8_Intra, sps->ScalingList8x8[2]);
+ scaling_list(rbsp, sps->ScalingList8x8[5], 64, Default_8x8_Inter, sps->ScalingList8x8[3]);
+ }
+ }
+ } else if (profile_idc == 183)
+ sps->chroma_format_idc = 0;
+ else
+ sps->chroma_format_idc = 1;
+
+ sps->log2_max_frame_num_minus4 = vl_rbsp_ue(rbsp);
+
+ sps->pic_order_cnt_type = vl_rbsp_ue(rbsp);
+
+ if (sps->pic_order_cnt_type == 0)
+ sps->log2_max_pic_order_cnt_lsb_minus4 = vl_rbsp_ue(rbsp);
+ else if (sps->pic_order_cnt_type == 1) {
+ sps->delta_pic_order_always_zero_flag = vl_rbsp_u(rbsp, 1);
+
+ sps->offset_for_non_ref_pic = vl_rbsp_se(rbsp);
+
+ sps->offset_for_top_to_bottom_field = vl_rbsp_se(rbsp);
+
+ sps->num_ref_frames_in_pic_order_cnt_cycle = vl_rbsp_ue(rbsp);
+
+ for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; ++i)
+ sps->offset_for_ref_frame[i] = vl_rbsp_se(rbsp);
+ }
+
+ sps->max_num_ref_frames = vl_rbsp_ue(rbsp);
+
+ /* gaps_in_frame_num_value_allowed_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* pic_width_in_mbs_minus1 */
+ vl_rbsp_ue(rbsp);
+
+ /* pic_height_in_map_units_minus1 */
+ vl_rbsp_ue(rbsp);
+
+ sps->frame_mbs_only_flag = vl_rbsp_u(rbsp, 1);
+ if (!sps->frame_mbs_only_flag)
+ sps->mb_adaptive_frame_field_flag = vl_rbsp_u(rbsp, 1);
+
+ sps->direct_8x8_inference_flag = vl_rbsp_u(rbsp, 1);
+
+ /* frame_cropping_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ /* frame_crop_left_offset */
+ vl_rbsp_ue(rbsp);
+
+ /* frame_crop_right_offset */
+ vl_rbsp_ue(rbsp);
+
+ /* frame_crop_top_offset */
+ vl_rbsp_ue(rbsp);
+
+ /* frame_crop_bottom_offset */
+ vl_rbsp_ue(rbsp);
+ }
+
+ /* vui_parameters_present_flag */
+ if (vl_rbsp_u(rbsp, 1))
+ vui_parameters(rbsp);
+}
+
+static struct pipe_h264_pps *pic_parameter_set_id(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ unsigned id = vl_rbsp_ue(rbsp);
+ if (id >= ARRAY_SIZE(priv->codec_data.h264.pps))
+ return NULL; /* invalid pic_parameter_set_id */
+
+ return &priv->codec_data.h264.pps[id];
+}
+
+static void picture_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ struct pipe_h264_sps *sps;
+ struct pipe_h264_pps *pps;
+ unsigned i;
+
+ pps = pic_parameter_set_id(priv, rbsp);
+ if (!pps)
+ return;
+
+ memset(pps, 0, sizeof(*pps));
+
+ sps = pps->sps = seq_parameter_set_id(priv, rbsp);
+ if (!sps)
+ return;
+
+ memcpy(pps->ScalingList4x4, sps->ScalingList4x4, sizeof(pps->ScalingList4x4));
+ memcpy(pps->ScalingList8x8, sps->ScalingList8x8, sizeof(pps->ScalingList8x8));
+
+ pps->entropy_coding_mode_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->bottom_field_pic_order_in_frame_present_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->num_slice_groups_minus1 = vl_rbsp_ue(rbsp);
+ if (pps->num_slice_groups_minus1 > 0) {
+ pps->slice_group_map_type = vl_rbsp_ue(rbsp);
+
+ if (pps->slice_group_map_type == 0) {
+
+ for (i = 0; i <= pps->num_slice_groups_minus1; ++i)
+ /* run_length_minus1[i] */
+ vl_rbsp_ue(rbsp);
+
+ } else if (pps->slice_group_map_type == 2) {
+
+ for (i = 0; i <= pps->num_slice_groups_minus1; ++i) {
+ /* top_left[i] */
+ vl_rbsp_ue(rbsp);
+
+ /* bottom_right[i] */
+ vl_rbsp_ue(rbsp);
+ }
+
+ } else if (pps->slice_group_map_type >= 3 && pps->slice_group_map_type <= 5) {
+
+ /* slice_group_change_direction_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ pps->slice_group_change_rate_minus1 = vl_rbsp_ue(rbsp);
+
+ } else if (pps->slice_group_map_type == 6) {
+
+ unsigned pic_size_in_map_units_minus1;
+
+ pic_size_in_map_units_minus1 = vl_rbsp_ue(rbsp);
+
+ for (i = 0; i <= pic_size_in_map_units_minus1; ++i)
+ /* slice_group_id[i] */
+ vl_rbsp_u(rbsp, log2(pps->num_slice_groups_minus1 + 1));
+ }
+ }
+
+ pps->num_ref_idx_l0_default_active_minus1 = vl_rbsp_ue(rbsp);
+
+ pps->num_ref_idx_l1_default_active_minus1 = vl_rbsp_ue(rbsp);
+
+ pps->weighted_pred_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->weighted_bipred_idc = vl_rbsp_u(rbsp, 2);
+
+ pps->pic_init_qp_minus26 = vl_rbsp_se(rbsp);
+
+ /* pic_init_qs_minus26 */
+ vl_rbsp_se(rbsp);
+
+ pps->chroma_qp_index_offset = vl_rbsp_se(rbsp);
+
+ pps->deblocking_filter_control_present_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->constrained_intra_pred_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->redundant_pic_cnt_present_flag = vl_rbsp_u(rbsp, 1);
+
+ if (vl_rbsp_more_data(rbsp)) {
+ pps->transform_8x8_mode_flag = vl_rbsp_u(rbsp, 1);
+
+ /* pic_scaling_matrix_present_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+
+ scaling_list(rbsp, pps->ScalingList4x4[0], 16, Default_4x4_Intra,
+ sps->seq_scaling_matrix_present_flag ? NULL : Default_4x4_Intra);
+ scaling_list(rbsp, pps->ScalingList4x4[1], 16, Default_4x4_Intra, pps->ScalingList4x4[0]);
+ scaling_list(rbsp, pps->ScalingList4x4[2], 16, Default_4x4_Intra, pps->ScalingList4x4[1]);
+ scaling_list(rbsp, pps->ScalingList4x4[3], 16, Default_4x4_Inter,
+ sps->seq_scaling_matrix_present_flag ? NULL : Default_4x4_Inter);
+ scaling_list(rbsp, pps->ScalingList4x4[4], 16, Default_4x4_Inter, pps->ScalingList4x4[3]);
+ scaling_list(rbsp, pps->ScalingList4x4[5], 16, Default_4x4_Inter, pps->ScalingList4x4[4]);
+
+ if (pps->transform_8x8_mode_flag) {
+ scaling_list(rbsp, pps->ScalingList8x8[0], 64, Default_8x8_Intra,
+ sps->seq_scaling_matrix_present_flag ? NULL : Default_8x8_Intra);
+ scaling_list(rbsp, pps->ScalingList8x8[1], 64, Default_8x8_Inter,
+ sps->seq_scaling_matrix_present_flag ? NULL : Default_8x8_Inter);
+ if (sps->chroma_format_idc == 3) {
+ scaling_list(rbsp, pps->ScalingList8x8[2], 64, Default_8x8_Intra, pps->ScalingList8x8[0]);
+ scaling_list(rbsp, pps->ScalingList8x8[3], 64, Default_8x8_Inter, pps->ScalingList8x8[1]);
+ scaling_list(rbsp, pps->ScalingList8x8[4], 64, Default_8x8_Intra, pps->ScalingList8x8[2]);
+ scaling_list(rbsp, pps->ScalingList8x8[5], 64, Default_8x8_Inter, pps->ScalingList8x8[3]);
+ }
+ }
+ }
+
+ pps->second_chroma_qp_index_offset = vl_rbsp_se(rbsp);
+ }
+}
+
+static void ref_pic_list_mvc_modification(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ // TODO
+ assert(0);
+}
+
+static void ref_pic_list_modification(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ enum pipe_h264_slice_type slice_type)
+{
+ unsigned modification_of_pic_nums_idc;
+
+ if (slice_type != 2 && slice_type != 4) {
+ /* ref_pic_list_modification_flag_l0 */
+ if (vl_rbsp_u(rbsp, 1)) {
+ do {
+ modification_of_pic_nums_idc = vl_rbsp_ue(rbsp);
+ if (modification_of_pic_nums_idc == 0 ||
+ modification_of_pic_nums_idc == 1)
+ /* abs_diff_pic_num_minus1 */
+ vl_rbsp_ue(rbsp);
+ else if (modification_of_pic_nums_idc == 2)
+ /* long_term_pic_num */
+ vl_rbsp_ue(rbsp);
+ } while (modification_of_pic_nums_idc != 3);
+ }
+ }
+
+ if (slice_type == 1) {
+ /* ref_pic_list_modification_flag_l1 */
+ if (vl_rbsp_u(rbsp, 1)) {
+ do {
+ modification_of_pic_nums_idc = vl_rbsp_ue(rbsp);
+ if (modification_of_pic_nums_idc == 0 ||
+ modification_of_pic_nums_idc == 1)
+ /* abs_diff_pic_num_minus1 */
+ vl_rbsp_ue(rbsp);
+ else if (modification_of_pic_nums_idc == 2)
+ /* long_term_pic_num */
+ vl_rbsp_ue(rbsp);
+ } while (modification_of_pic_nums_idc != 3);
+ }
+ }
+}
+
+static void pred_weight_table(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ struct pipe_h264_sps *sps, enum pipe_h264_slice_type slice_type)
+{
+ unsigned ChromaArrayType = sps->separate_colour_plane_flag ? 0 : sps->chroma_format_idc;
+ unsigned i, j;
+
+ /* luma_log2_weight_denom */
+ vl_rbsp_ue(rbsp);
+
+ if (ChromaArrayType != 0)
+ /* chroma_log2_weight_denom */
+ vl_rbsp_ue(rbsp);
+
+ for (i = 0; i <= priv->picture.h264.num_ref_idx_l0_active_minus1; ++i) {
+ /* luma_weight_l0_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ /* luma_weight_l0[i] */
+ vl_rbsp_se(rbsp);
+ /* luma_offset_l0[i] */
+ vl_rbsp_se(rbsp);
+ }
+ if (ChromaArrayType != 0) {
+ /* chroma_weight_l0_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ for (j = 0; j < 2; ++j) {
+ /* chroma_weight_l0[i][j] */
+ vl_rbsp_se(rbsp);
+ /* chroma_offset_l0[i][j] */
+ vl_rbsp_se(rbsp);
+ }
+ }
+ }
+ }
+
+ if (slice_type == 1) {
+ for (i = 0; i <= priv->picture.h264.num_ref_idx_l1_active_minus1; ++i) {
+ /* luma_weight_l1_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ /* luma_weight_l1[i] */
+ vl_rbsp_se(rbsp);
+ /* luma_offset_l1[i] */
+ vl_rbsp_se(rbsp);
+ }
+ if (ChromaArrayType != 0) {
+ /* chroma_weight_l1_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ for (j = 0; j < 2; ++j) {
+ /* chroma_weight_l1[i][j] */
+ vl_rbsp_se(rbsp);
+ /* chroma_offset_l1[i][j] */
+ vl_rbsp_se(rbsp);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void dec_ref_pic_marking(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ bool IdrPicFlag)
+{
+ unsigned memory_management_control_operation;
+
+ if (IdrPicFlag) {
+ /* no_output_of_prior_pics_flag */
+ vl_rbsp_u(rbsp, 1);
+ /* long_term_reference_flag */
+ vl_rbsp_u(rbsp, 1);
+ } else {
+ /* adaptive_ref_pic_marking_mode_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ do {
+ memory_management_control_operation = vl_rbsp_ue(rbsp);
+
+ if (memory_management_control_operation == 1 ||
+ memory_management_control_operation == 3)
+ /* difference_of_pic_nums_minus1 */
+ vl_rbsp_ue(rbsp);
+
+ if (memory_management_control_operation == 2)
+ /* long_term_pic_num */
+ vl_rbsp_ue(rbsp);
+
+ if (memory_management_control_operation == 3 ||
+ memory_management_control_operation == 6)
+ /* long_term_frame_idx */
+ vl_rbsp_ue(rbsp);
+
+ if (memory_management_control_operation == 4)
+ /* max_long_term_frame_idx_plus1 */
+ vl_rbsp_ue(rbsp);
+ } while (memory_management_control_operation != 0);
+ }
+ }
+}
+
+static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ unsigned nal_ref_idc, unsigned nal_unit_type)
+{
+ enum pipe_h264_slice_type slice_type;
+ struct pipe_h264_pps *pps;
+ struct pipe_h264_sps *sps;
+ unsigned frame_num, prevFrameNum;
+ bool IdrPicFlag = nal_unit_type == 5;
+
+ if (IdrPicFlag != priv->codec_data.h264.IdrPicFlag)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.IdrPicFlag = IdrPicFlag;
+
+ /* first_mb_in_slice */
+ vl_rbsp_ue(rbsp);
+
+ slice_type = vl_rbsp_ue(rbsp) % 5;
+
+ pps = pic_parameter_set_id(priv, rbsp);
+ if (!pps)
+ return;
+
+ sps = pps->sps;
+ if (!sps)
+ return;
+
+ if (pps != priv->picture.h264.pps)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->picture.h264.pps = pps;
+
+ if (sps->separate_colour_plane_flag == 1 )
+ /* colour_plane_id */
+ vl_rbsp_u(rbsp, 2);
+
+ frame_num = vl_rbsp_u(rbsp, sps->log2_max_frame_num_minus4 + 4);
+
+ if (frame_num != priv->picture.h264.frame_num)
+ vid_dec_h264_EndFrame(priv);
+
+ prevFrameNum = priv->picture.h264.frame_num;
+ priv->picture.h264.frame_num = frame_num;
+
+ priv->picture.h264.field_pic_flag = 0;
+ priv->picture.h264.bottom_field_flag = 0;
+
+ if (!sps->frame_mbs_only_flag) {
+ unsigned field_pic_flag = vl_rbsp_u(rbsp, 1);
+
+ if (!field_pic_flag && field_pic_flag != priv->picture.h264.field_pic_flag)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->picture.h264.field_pic_flag = field_pic_flag;
+
+ if (priv->picture.h264.field_pic_flag) {
+ unsigned bottom_field_flag = vl_rbsp_u(rbsp, 1);
+
+ if (bottom_field_flag != priv->picture.h264.bottom_field_flag)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->picture.h264.bottom_field_flag = bottom_field_flag;
+ }
+ }
+
+ if (IdrPicFlag) {
+ unsigned idr_pic_id = vl_rbsp_ue(rbsp);
+
+ if (idr_pic_id != priv->codec_data.h264.idr_pic_id)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.idr_pic_id = idr_pic_id;
+ }
+
+ if (sps->pic_order_cnt_type == 0) {
+ unsigned log2_max_pic_order_cnt_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
+ unsigned max_pic_order_cnt_lsb = 1 << log2_max_pic_order_cnt_lsb;
+ int pic_order_cnt_lsb = vl_rbsp_u(rbsp, log2_max_pic_order_cnt_lsb);
+ int pic_order_cnt_msb;
+
+ if (pic_order_cnt_lsb != priv->codec_data.h264.pic_order_cnt_lsb)
+ vid_dec_h264_EndFrame(priv);
+
+ if (IdrPicFlag) {
+ priv->codec_data.h264.pic_order_cnt_msb = 0;
+ priv->codec_data.h264.pic_order_cnt_lsb = 0;
+ }
+
+ if ((pic_order_cnt_lsb < priv->codec_data.h264.pic_order_cnt_lsb) &&
+ (priv->codec_data.h264.pic_order_cnt_lsb - pic_order_cnt_lsb) >= (max_pic_order_cnt_lsb / 2))
+ pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb + max_pic_order_cnt_lsb;
+
+ else if ((pic_order_cnt_lsb > priv->codec_data.h264.pic_order_cnt_lsb) &&
+ (pic_order_cnt_lsb - priv->codec_data.h264.pic_order_cnt_lsb) > (max_pic_order_cnt_lsb / 2))
+ pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb - max_pic_order_cnt_lsb;
+
+ else
+ pic_order_cnt_msb = priv->codec_data.h264.pic_order_cnt_msb;
+
+ priv->codec_data.h264.pic_order_cnt_msb = pic_order_cnt_msb;
+ priv->codec_data.h264.pic_order_cnt_lsb = pic_order_cnt_lsb;
+
+ if (pps->bottom_field_pic_order_in_frame_present_flag && !priv->picture.h264.field_pic_flag) {
+ unsigned delta_pic_order_cnt_bottom = vl_rbsp_se(rbsp);
+
+ if (delta_pic_order_cnt_bottom != priv->codec_data.h264.delta_pic_order_cnt_bottom)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
+ }
+
+ if (!priv->picture.h264.field_pic_flag) {
+ priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+ priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
+ priv->codec_data.h264.delta_pic_order_cnt_bottom;
+ } else if (!priv->picture.h264.bottom_field_flag)
+ priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+ else
+ priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
+
+ } else if (sps->pic_order_cnt_type == 1) {
+ unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
+ unsigned FrameNumOffset, absFrameNum, expectedPicOrderCnt;
+
+ if (!sps->delta_pic_order_always_zero_flag) {
+ unsigned delta_pic_order_cnt[2];
+
+ delta_pic_order_cnt[0] = vl_rbsp_se(rbsp);
+
+ if (delta_pic_order_cnt[0] != priv->codec_data.h264.delta_pic_order_cnt[0])
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.delta_pic_order_cnt[0] = delta_pic_order_cnt[0];
+
+ if (pps->bottom_field_pic_order_in_frame_present_flag && !priv->picture.h264.field_pic_flag) {
+ delta_pic_order_cnt[1] = vl_rbsp_se(rbsp);
+
+ if (delta_pic_order_cnt[1] != priv->codec_data.h264.delta_pic_order_cnt[1])
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.delta_pic_order_cnt[1] = delta_pic_order_cnt[1];
+ }
+ }
+
+ if (IdrPicFlag)
+ FrameNumOffset = 0;
+ else if (prevFrameNum > frame_num)
+ FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset + MaxFrameNum;
+ else
+ FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset;
+
+ priv->codec_data.h264.prevFrameNumOffset = FrameNumOffset;
+
+ if (sps->num_ref_frames_in_pic_order_cnt_cycle != 0)
+ absFrameNum = FrameNumOffset + frame_num;
+ else
+ absFrameNum = 0;
+
+ if (nal_ref_idc == 0 && absFrameNum > 0)
+ absFrameNum = absFrameNum - 1;
+
+ if (absFrameNum > 0) {
+ unsigned picOrderCntCycleCnt = (absFrameNum - 1) / sps->num_ref_frames_in_pic_order_cnt_cycle;
+ unsigned frameNumInPicOrderCntCycle = (absFrameNum - 1) % sps->num_ref_frames_in_pic_order_cnt_cycle;
+ signed ExpectedDeltaPerPicOrderCntCycle = 0;
+ unsigned i;
+
+ for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; ++i)
+ ExpectedDeltaPerPicOrderCntCycle += sps->offset_for_ref_frame[i];
+
+ expectedPicOrderCnt = picOrderCntCycleCnt * ExpectedDeltaPerPicOrderCntCycle;
+ for (i = 0; i <= frameNumInPicOrderCntCycle; ++i)
+ expectedPicOrderCnt += sps->offset_for_ref_frame[i];
+
+ } else
+ expectedPicOrderCnt = 0;
+
+ if (nal_ref_idc == 0)
+ expectedPicOrderCnt += sps->offset_for_non_ref_pic;
+
+ if (!priv->picture.h264.field_pic_flag) {
+ priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
+ priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt[0] +
+ sps->offset_for_top_to_bottom_field + priv->codec_data.h264.delta_pic_order_cnt[1];
+
+ } else if (!priv->picture.h264.bottom_field_flag)
+ priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
+ else
+ priv->picture.h264.field_order_cnt[1] = expectedPicOrderCnt + sps->offset_for_top_to_bottom_field +
+ priv->codec_data.h264.delta_pic_order_cnt[0];
+
+ } else if (sps->pic_order_cnt_type == 2) {
+ unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
+ unsigned FrameNumOffset, tempPicOrderCnt;
+
+ if (IdrPicFlag)
+ FrameNumOffset = 0;
+ else if (prevFrameNum > frame_num)
+ FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset + MaxFrameNum;
+ else
+ FrameNumOffset = priv->codec_data.h264.prevFrameNumOffset;
+
+ priv->codec_data.h264.prevFrameNumOffset = FrameNumOffset;
+
+ if (IdrPicFlag)
+ tempPicOrderCnt = 0;
+ else if (nal_ref_idc == 0)
+ tempPicOrderCnt = 2 * (FrameNumOffset + frame_num) - 1;
+ else
+ tempPicOrderCnt = 2 * (FrameNumOffset + frame_num);
+
+ if (!priv->picture.h264.field_pic_flag) {
+ priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
+ priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt;
+
+ } else if (!priv->picture.h264.bottom_field_flag)
+ priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
+ else
+ priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt;
+ }
+
+ if (pps->redundant_pic_cnt_present_flag)
+ /* redundant_pic_cnt */
+ vl_rbsp_ue(rbsp);
+
+ if (slice_type == PIPE_H264_SLICE_TYPE_B)
+ /* direct_spatial_mv_pred_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ priv->picture.h264.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
+ priv->picture.h264.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
+
+ if (slice_type == PIPE_H264_SLICE_TYPE_P ||
+ slice_type == PIPE_H264_SLICE_TYPE_SP ||
+ slice_type == PIPE_H264_SLICE_TYPE_B) {
+
+ /* num_ref_idx_active_override_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ priv->picture.h264.num_ref_idx_l0_active_minus1 = vl_rbsp_ue(rbsp);
+
+ if (slice_type == PIPE_H264_SLICE_TYPE_B)
+ priv->picture.h264.num_ref_idx_l1_active_minus1 = vl_rbsp_ue(rbsp);
+ }
+ }
+
+ if (nal_unit_type == 20 || nal_unit_type == 21)
+ ref_pic_list_mvc_modification(priv, rbsp);
+ else
+ ref_pic_list_modification(priv, rbsp, slice_type);
+
+ if ((pps->weighted_pred_flag && (slice_type == PIPE_H264_SLICE_TYPE_P || slice_type == PIPE_H264_SLICE_TYPE_SP)) ||
+ (pps->weighted_bipred_idc == 1 && slice_type == PIPE_H264_SLICE_TYPE_B))
+ pred_weight_table(priv, rbsp, sps, slice_type);
+
+ if (nal_ref_idc != 0)
+ dec_ref_pic_marking(priv, rbsp, IdrPicFlag);
+
+ if (pps->entropy_coding_mode_flag && slice_type != PIPE_H264_SLICE_TYPE_I && slice_type != PIPE_H264_SLICE_TYPE_SI)
+ /* cabac_init_idc */
+ vl_rbsp_ue(rbsp);
+
+ /* slice_qp_delta */
+ vl_rbsp_se(rbsp);
+
+ if (slice_type == PIPE_H264_SLICE_TYPE_SP || slice_type == PIPE_H264_SLICE_TYPE_SI) {
+ if (slice_type == PIPE_H264_SLICE_TYPE_SP)
+ /* sp_for_switch_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /*slice_qs_delta */
+ vl_rbsp_se(rbsp);
+ }
+
+ if (pps->deblocking_filter_control_present_flag) {
+ unsigned disable_deblocking_filter_idc = vl_rbsp_ue(rbsp);
+
+ if (disable_deblocking_filter_idc != 1) {
+ /* slice_alpha_c0_offset_div2 */
+ vl_rbsp_se(rbsp);
+
+ /* slice_beta_offset_div2 */
+ vl_rbsp_se(rbsp);
+ }
+ }
+
+ if (pps->num_slice_groups_minus1 > 0 && pps->slice_group_map_type >= 3 && pps->slice_group_map_type <= 5)
+ /* slice_group_change_cycle */
+ vl_rbsp_u(rbsp, 2);
+}
+
+static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left)
+{
+ unsigned nal_ref_idc, nal_unit_type;
+
+ if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00))
+ return;
+
+ if (vl_vlc_peekbits(vlc, 24) != 0x000001) {
+ vl_vlc_eatbits(vlc, 8);
+ return;
+ }
+
+ if (priv->slice) {
+ unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8);
+ ++priv->picture.h264.slice_count;
+ priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base,
+ 1, &priv->slice, &bytes);
+ priv->slice = NULL;
+ }
+
+ vl_vlc_eatbits(vlc, 24);
+
+ /* forbidden_zero_bit */
+ vl_vlc_eatbits(vlc, 1);
+
+ nal_ref_idc = vl_vlc_get_uimsbf(vlc, 2);
+
+ if (nal_ref_idc != priv->codec_data.h264.nal_ref_idc &&
+ (nal_ref_idc * priv->codec_data.h264.nal_ref_idc) == 0)
+ vid_dec_h264_EndFrame(priv);
+
+ priv->codec_data.h264.nal_ref_idc = nal_ref_idc;
+
+ nal_unit_type = vl_vlc_get_uimsbf(vlc, 5);
+
+ if (nal_unit_type != 1 && nal_unit_type != 5)
+ vid_dec_h264_EndFrame(priv);
+
+ if (nal_unit_type == 7) {
+ struct vl_rbsp rbsp;
+ vl_rbsp_init(&rbsp, vlc, ~0);
+ seq_parameter_set(priv, &rbsp);
+
+ } else if (nal_unit_type == 8) {
+ struct vl_rbsp rbsp;
+ vl_rbsp_init(&rbsp, vlc, ~0);
+ picture_parameter_set(priv, &rbsp);
+
+ } else if (nal_unit_type == 1 || nal_unit_type == 5) {
+ /* Coded slice of a non-IDR or IDR picture */
+ unsigned bits = vl_vlc_valid_bits(vlc);
+ unsigned bytes = bits / 8 + 4;
+ struct vl_rbsp rbsp;
+ uint8_t buf[8];
+ const void *ptr = buf;
+ unsigned i;
+
+ buf[0] = 0x0;
+ buf[1] = 0x0;
+ buf[2] = 0x1;
+ buf[3] = (nal_ref_idc << 5) | nal_unit_type;
+ for (i = 4; i < bytes; ++i)
+ buf[i] = vl_vlc_peekbits(vlc, bits) >> ((bytes - i - 1) * 8);
+
+ priv->bytes_left = (vl_vlc_bits_left(vlc) - bits) / 8;
+ priv->slice = vlc->data;
+
+ vl_rbsp_init(&rbsp, vlc, 128);
+ slice_header(priv, &rbsp, nal_ref_idc, nal_unit_type);
+
+ vid_dec_h264_BeginFrame(priv);
+
+ ++priv->picture.h264.slice_count;
+ priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base,
+ 1, &ptr, &bytes);
+ }
+
+ /* resync to byte boundary */
+ vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8);
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c
new file mode 100644
index 000000000..3242dbe11
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_h265.c
@@ -0,0 +1,1013 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_video_codec.h"
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "vl/vl_rbsp.h"
+
+#include "entrypoint.h"
+#include "vid_dec.h"
+
+#define DPB_MAX_SIZE 32
+#define MAX_NUM_REF_PICS 16
+
+enum {
+ NAL_UNIT_TYPE_TRAIL_N = 0,
+ NAL_UNIT_TYPE_TRAIL_R = 1,
+ NAL_UNIT_TYPE_TSA_N = 2,
+ NAL_UNIT_TYPE_TSA_R = 3,
+ NAL_UNIT_TYPE_STSA_N = 4,
+ NAL_UNIT_TYPE_STSA_R = 5,
+ NAL_UNIT_TYPE_RADL_N = 6,
+ NAL_UNIT_TYPE_RADL_R = 7,
+ NAL_UNIT_TYPE_RASL_N = 8,
+ NAL_UNIT_TYPE_RASL_R = 9,
+ NAL_UNIT_TYPE_BLA_W_LP = 16,
+ NAL_UNIT_TYPE_BLA_W_RADL = 17,
+ NAL_UNIT_TYPE_BLA_N_LP = 18,
+ NAL_UNIT_TYPE_IDR_W_RADL = 19,
+ NAL_UNIT_TYPE_IDR_N_LP = 20,
+ NAL_UNIT_TYPE_CRA = 21,
+ NAL_UNIT_TYPE_SPS = 33,
+ NAL_UNIT_TYPE_PPS = 34,
+};
+
+static const uint8_t Default_8x8_Intra[64] = {
+ 16, 16, 16, 16, 17, 18, 21, 24,
+ 16, 16, 16, 16, 17, 19, 22, 25,
+ 16, 16, 17, 18, 20, 22, 25, 29,
+ 16, 16, 18, 21, 24, 27, 31, 36,
+ 17, 17, 20, 24, 30, 35, 41, 47,
+ 18, 19, 22, 27, 35, 44, 54, 65,
+ 21, 22, 25, 31, 41, 54, 70, 88,
+ 24, 25, 29, 36, 47, 65, 88, 115
+};
+
+static const uint8_t Default_8x8_Inter[64] = {
+ 16, 16, 16, 16, 17, 18, 20, 24,
+ 16, 16, 16, 17, 18, 20, 24, 25,
+ 16, 16, 17, 18, 20, 24, 25, 28,
+ 16, 17, 18, 20, 24, 25, 28, 33,
+ 17, 18, 20, 24, 25, 28, 33, 41,
+ 18, 20, 24, 25, 28, 33, 41, 54,
+ 20, 24, 25, 28, 33, 41, 54, 71,
+ 24, 25, 28, 33, 41, 54, 71, 91
+};
+
+struct dpb_list {
+ struct list_head list;
+ struct pipe_video_buffer *buffer;
+ OMX_TICKS timestamp;
+ unsigned poc;
+};
+
+struct ref_pic_set {
+ unsigned num_pics;
+ unsigned num_neg_pics;
+ unsigned num_pos_pics;
+ unsigned num_delta_poc;
+ int delta_poc[MAX_NUM_REF_PICS];
+ bool used[MAX_NUM_REF_PICS];
+};
+
+static bool is_idr_picture(unsigned nal_unit_type)
+{
+ return (nal_unit_type == NAL_UNIT_TYPE_IDR_W_RADL ||
+ nal_unit_type == NAL_UNIT_TYPE_IDR_N_LP);
+}
+
+/* broken link access picture */
+static bool is_bla_picture(unsigned nal_unit_type)
+{
+ return (nal_unit_type == NAL_UNIT_TYPE_BLA_W_LP ||
+ nal_unit_type == NAL_UNIT_TYPE_BLA_W_RADL ||
+ nal_unit_type == NAL_UNIT_TYPE_BLA_N_LP);
+}
+
+/* random access point picture */
+static bool is_rap_picture(unsigned nal_unit_type)
+{
+ return (nal_unit_type >= NAL_UNIT_TYPE_BLA_W_LP &&
+ nal_unit_type <= NAL_UNIT_TYPE_CRA);
+}
+
+static bool is_slice_picture(unsigned nal_unit_type)
+{
+ return (nal_unit_type <= NAL_UNIT_TYPE_RASL_R ||
+ is_rap_picture(nal_unit_type));
+}
+
+static void set_poc(vid_dec_PrivateType *priv,
+ unsigned nal_unit_type, int i)
+{
+ priv->picture.h265.CurrPicOrderCntVal = i;
+
+ if (priv->codec_data.h265.temporal_id == 0 &&
+ (nal_unit_type == NAL_UNIT_TYPE_TRAIL_R ||
+ nal_unit_type == NAL_UNIT_TYPE_TSA_R ||
+ nal_unit_type == NAL_UNIT_TYPE_STSA_R ||
+ is_rap_picture(nal_unit_type)))
+ priv->codec_data.h265.slice_prev_poc = i;
+}
+
+static unsigned get_poc(vid_dec_PrivateType *priv)
+{
+ return priv->picture.h265.CurrPicOrderCntVal;
+}
+
+static void profile_tier(struct vl_rbsp *rbsp)
+{
+ int i;
+
+ /* general_profile_space */
+ vl_rbsp_u(rbsp, 2);
+
+ /* general_tier_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_profile_idc */
+ vl_rbsp_u(rbsp, 5);
+
+ /* general_profile_compatibility_flag */
+ for(i = 0; i < 32; ++i)
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_progressive_source_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_interlaced_source_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_non_packed_constraint_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_frame_only_constraint_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* general_reserved_zero_44bits */
+ vl_rbsp_u(rbsp, 16);
+ vl_rbsp_u(rbsp, 16);
+ vl_rbsp_u(rbsp, 12);
+}
+
+static unsigned profile_tier_level(struct vl_rbsp *rbsp,
+ int max_sublayers_minus1)
+{
+ bool sub_layer_profile_present_flag[6];
+ bool sub_layer_level_present_flag[6];
+ unsigned level_idc;
+ int i;
+
+ profile_tier(rbsp);
+
+ /* general_level_idc */
+ level_idc = vl_rbsp_u(rbsp, 8);
+
+ for (i = 0; i < max_sublayers_minus1; ++i) {
+ sub_layer_profile_present_flag[i] = vl_rbsp_u(rbsp, 1);
+ sub_layer_level_present_flag[i] = vl_rbsp_u(rbsp, 1);
+ }
+
+ if (max_sublayers_minus1 > 0)
+ for (i = max_sublayers_minus1; i < 8; ++i)
+ /* reserved_zero_2bits */
+ vl_rbsp_u(rbsp, 2);
+
+ for (i = 0; i < max_sublayers_minus1; ++i) {
+ if (sub_layer_profile_present_flag[i])
+ profile_tier(rbsp);
+
+ if (sub_layer_level_present_flag[i])
+ /* sub_layer_level_idc */
+ vl_rbsp_u(rbsp, 8);
+ }
+
+ return level_idc;
+}
+
+static void scaling_list_data(vid_dec_PrivateType *priv,
+ struct vl_rbsp *rbsp, struct pipe_h265_sps *sps)
+{
+ unsigned size_id, matrix_id;
+ unsigned scaling_list_len[4] = { 16, 64, 64, 64 };
+ uint8_t scaling_list4x4[6][64] = { };
+ int i;
+
+ uint8_t (*scaling_list_data[4])[6][64] = {
+ (uint8_t (*)[6][64])scaling_list4x4,
+ (uint8_t (*)[6][64])sps->ScalingList8x8,
+ (uint8_t (*)[6][64])sps->ScalingList16x16,
+ (uint8_t (*)[6][64])sps->ScalingList32x32
+ };
+ uint8_t (*scaling_list_dc_coeff[2])[6] = {
+ (uint8_t (*)[6])sps->ScalingListDCCoeff16x16,
+ (uint8_t (*)[6])sps->ScalingListDCCoeff32x32
+ };
+
+ for (size_id = 0; size_id < 4; ++size_id) {
+
+ for (matrix_id = 0; matrix_id < ((size_id == 3) ? 2 : 6); ++matrix_id) {
+ bool scaling_list_pred_mode_flag = vl_rbsp_u(rbsp, 1);
+
+ if (!scaling_list_pred_mode_flag) {
+ /* scaling_list_pred_matrix_id_delta */;
+ unsigned matrix_id_with_delta = matrix_id - vl_rbsp_ue(rbsp);
+
+ if (matrix_id != matrix_id_with_delta) {
+ memcpy((*scaling_list_data[size_id])[matrix_id],
+ (*scaling_list_data[size_id])[matrix_id_with_delta],
+ scaling_list_len[size_id]);
+ if (size_id > 1)
+ (*scaling_list_dc_coeff[size_id - 2])[matrix_id] =
+ (*scaling_list_dc_coeff[size_id - 2])[matrix_id_with_delta];
+ } else {
+ const uint8_t *d;
+
+ if (size_id == 0)
+ memset((*scaling_list_data[0])[matrix_id], 16, 16);
+ else {
+ if (size_id < 3)
+ d = (matrix_id < 3) ? Default_8x8_Intra : Default_8x8_Inter;
+ else
+ d = (matrix_id < 1) ? Default_8x8_Intra : Default_8x8_Inter;
+ memcpy((*scaling_list_data[size_id])[matrix_id], d,
+ scaling_list_len[size_id]);
+ }
+ if (size_id > 1)
+ (*scaling_list_dc_coeff[size_id - 2])[matrix_id] = 16;
+ }
+ } else {
+ int next_coef = 8;
+ int coef_num = MIN2(64, (1 << (4 + (size_id << 1))));
+
+ if (size_id > 1) {
+ /* scaling_list_dc_coef_minus8 */
+ next_coef = vl_rbsp_se(rbsp) + 8;
+ (*scaling_list_dc_coeff[size_id - 2])[matrix_id] = next_coef;
+ }
+
+ for (i = 0; i < coef_num; ++i) {
+ /* scaling_list_delta_coef */
+ next_coef = (next_coef + vl_rbsp_se(rbsp) + 256) % 256;
+ (*scaling_list_data[size_id])[matrix_id][i] = next_coef;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 6; ++i)
+ memcpy(sps->ScalingList4x4[i], scaling_list4x4[i], 16);
+
+ return;
+}
+
+static void st_ref_pic_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ struct ref_pic_set *rps, struct pipe_h265_sps *sps,
+ unsigned idx)
+{
+ bool inter_rps_pred_flag;
+ unsigned delta_idx_minus1;
+ int delta_poc;
+ int i;
+
+ inter_rps_pred_flag = (idx != 0) ? (vl_rbsp_u(rbsp, 1)) : false;
+
+ if (inter_rps_pred_flag) {
+ struct ref_pic_set *ref_rps;
+ unsigned sign, abs;
+ int delta_rps;
+ bool used;
+ int j;
+
+ if (idx == sps->num_short_term_ref_pic_sets)
+ delta_idx_minus1 = vl_rbsp_ue(rbsp);
+ else
+ delta_idx_minus1 = 0;
+
+ ref_rps = (struct ref_pic_set *)
+ priv->codec_data.h265.ref_pic_set_list + idx - (delta_idx_minus1 + 1);
+
+ /* delta_rps_sign */
+ sign = vl_rbsp_u(rbsp, 1);
+ /* abs_delta_rps_minus1 */
+ abs = vl_rbsp_ue(rbsp);
+ delta_rps = (1 - 2 * sign) * (abs + 1);
+
+ rps->num_neg_pics = 0;
+ rps->num_pos_pics = 0;
+ rps->num_pics = 0;
+
+ for(i = 0 ; i <= ref_rps->num_pics; ++i) {
+ /* used_by_curr_pic_flag */
+ if (!vl_rbsp_u(rbsp, 1))
+ /* use_delta_flag */
+ vl_rbsp_u(rbsp, 1);
+ else {
+ delta_poc = delta_rps +
+ ((i < ref_rps->num_pics)? ref_rps->delta_poc[i] : 0);
+ rps->delta_poc[rps->num_pics] = delta_poc;
+ rps->used[rps->num_pics] = true;
+ if (delta_poc < 0)
+ rps->num_neg_pics++;
+ else
+ rps->num_pos_pics++;
+ rps->num_pics++;
+ }
+ }
+
+ rps->num_delta_poc = ref_rps->num_pics;
+
+ /* sort delta poc */
+ for (i = 1; i < rps->num_pics; ++i) {
+ delta_poc = rps->delta_poc[i];
+ used = rps->used[i];
+ for (j = i - 1; j >= 0; j--) {
+ if (delta_poc < rps->delta_poc[j]) {
+ rps->delta_poc[j + 1] = rps->delta_poc[j];
+ rps->used[j + 1] = rps->used[j];
+ rps->delta_poc[j] = delta_poc;
+ rps->used[j] = used;
+ }
+ }
+ }
+
+ for (i = 0 , j = rps->num_neg_pics - 1;
+ i < rps->num_neg_pics >> 1; i++, j--) {
+ delta_poc = rps->delta_poc[i];
+ used = rps->used[i];
+ rps->delta_poc[i] = rps->delta_poc[j];
+ rps->used[i] = rps->used[j];
+ rps->delta_poc[j] = delta_poc;
+ rps->used[j] = used;
+ }
+ } else {
+ /* num_negative_pics */
+ rps->num_neg_pics = vl_rbsp_ue(rbsp);
+ /* num_positive_pics */
+ rps->num_pos_pics = vl_rbsp_ue(rbsp);
+ rps->num_pics = rps->num_neg_pics + rps->num_pos_pics;
+
+ delta_poc = 0;
+ for(i = 0 ; i < rps->num_neg_pics; ++i) {
+ /* delta_poc_s0_minus1 */
+ delta_poc -= (vl_rbsp_ue(rbsp) + 1);
+ rps->delta_poc[i] = delta_poc;
+ /* used_by_curr_pic_s0_flag */
+ rps->used[i] = vl_rbsp_u(rbsp, 1);
+ }
+
+ delta_poc = 0;
+ for(i = rps->num_neg_pics; i < rps->num_pics; ++i) {
+ /* delta_poc_s1_minus1 */
+ delta_poc += (vl_rbsp_ue(rbsp) + 1);
+ rps->delta_poc[i] = delta_poc;
+ /* used_by_curr_pic_s1_flag */
+ rps->used[i] = vl_rbsp_u(rbsp, 1);
+ }
+ }
+}
+
+static struct pipe_h265_sps *seq_parameter_set_id(vid_dec_PrivateType *priv,
+ struct vl_rbsp *rbsp)
+{
+ unsigned id = vl_rbsp_ue(rbsp);
+
+ if (id >= ARRAY_SIZE(priv->codec_data.h265.sps))
+ return NULL;
+
+ return &priv->codec_data.h265.sps[id];
+}
+
+static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp)
+{
+ struct pipe_h265_sps *sps;
+ int sps_max_sub_layers_minus1;
+ unsigned i;
+
+ /* sps_video_parameter_set_id */
+ vl_rbsp_u(rbsp, 4);
+
+ /* sps_max_sub_layers_minus1 */
+ sps_max_sub_layers_minus1 = vl_rbsp_u(rbsp, 3);
+
+ assert(sps_max_sub_layers_minus1 <= 6);
+
+ /* sps_temporal_id_nesting_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ priv->codec_data.h265.level_idc =
+ profile_tier_level(rbsp, sps_max_sub_layers_minus1);
+
+ sps = seq_parameter_set_id(priv, rbsp);
+ if (!sps)
+ return;
+
+ memset(sps, 0, sizeof(*sps));
+
+ sps->chroma_format_idc = vl_rbsp_ue(rbsp);
+
+ if (sps->chroma_format_idc == 3)
+ sps->separate_colour_plane_flag = vl_rbsp_u(rbsp, 1);
+
+ priv->codec_data.h265.pic_width_in_luma_samples =
+ sps->pic_width_in_luma_samples = vl_rbsp_ue(rbsp);
+
+ priv->codec_data.h265.pic_height_in_luma_samples =
+ sps->pic_height_in_luma_samples = vl_rbsp_ue(rbsp);
+
+ /* conformance_window_flag */
+ if (vl_rbsp_u(rbsp, 1)) {
+ /* conf_win_left_offset */
+ vl_rbsp_ue(rbsp);
+ /* conf_win_right_offset */
+ vl_rbsp_ue(rbsp);
+ /* conf_win_top_offset */
+ vl_rbsp_ue(rbsp);
+ /* conf_win_bottom_offset */
+ vl_rbsp_ue(rbsp);
+ }
+
+ sps->bit_depth_luma_minus8 = vl_rbsp_ue(rbsp);
+ sps->bit_depth_chroma_minus8 = vl_rbsp_ue(rbsp);
+ sps->log2_max_pic_order_cnt_lsb_minus4 = vl_rbsp_ue(rbsp);
+
+ /* sps_sub_layer_ordering_info_present_flag */
+ i = vl_rbsp_u(rbsp, 1) ? 0 : sps_max_sub_layers_minus1;
+ for (; i <= sps_max_sub_layers_minus1; ++i) {
+ sps->sps_max_dec_pic_buffering_minus1 = vl_rbsp_ue(rbsp);
+ /* sps_max_num_reorder_pics */
+ vl_rbsp_ue(rbsp);
+ /* sps_max_latency_increase_plus */
+ vl_rbsp_ue(rbsp);
+ }
+
+ sps->log2_min_luma_coding_block_size_minus3 = vl_rbsp_ue(rbsp);
+ sps->log2_diff_max_min_luma_coding_block_size = vl_rbsp_ue(rbsp);
+ sps->log2_min_transform_block_size_minus2 = vl_rbsp_ue(rbsp);
+ sps->log2_diff_max_min_transform_block_size = vl_rbsp_ue(rbsp);
+ sps->max_transform_hierarchy_depth_inter = vl_rbsp_ue(rbsp);
+ sps->max_transform_hierarchy_depth_intra = vl_rbsp_ue(rbsp);
+
+ sps->scaling_list_enabled_flag = vl_rbsp_u(rbsp, 1);
+ if (sps->scaling_list_enabled_flag)
+ /* sps_scaling_list_data_present_flag */
+ if (vl_rbsp_u(rbsp, 1))
+ scaling_list_data(priv, rbsp, sps);
+
+ sps->amp_enabled_flag = vl_rbsp_u(rbsp, 1);
+ sps->sample_adaptive_offset_enabled_flag = vl_rbsp_u(rbsp, 1);
+ sps->pcm_enabled_flag = vl_rbsp_u(rbsp, 1);
+ if (sps->pcm_enabled_flag) {
+ sps->pcm_sample_bit_depth_luma_minus1 = vl_rbsp_u(rbsp, 4);
+ sps->pcm_sample_bit_depth_chroma_minus1 = vl_rbsp_u(rbsp, 4);
+ sps->log2_min_pcm_luma_coding_block_size_minus3 = vl_rbsp_ue(rbsp);
+ sps->log2_diff_max_min_pcm_luma_coding_block_size = vl_rbsp_ue(rbsp);
+ sps->pcm_loop_filter_disabled_flag = vl_rbsp_u(rbsp, 1);
+ }
+
+ sps->num_short_term_ref_pic_sets = vl_rbsp_ue(rbsp);
+
+ for (i = 0; i < sps->num_short_term_ref_pic_sets; ++i) {
+ struct ref_pic_set *rps;
+
+ rps = (struct ref_pic_set *)
+ priv->codec_data.h265.ref_pic_set_list + i;
+ st_ref_pic_set(priv, rbsp, rps, sps, i);
+ }
+
+ sps->long_term_ref_pics_present_flag = vl_rbsp_u(rbsp, 1);
+ if (sps->long_term_ref_pics_present_flag) {
+ sps->num_long_term_ref_pics_sps = vl_rbsp_ue(rbsp);
+ for (i = 0; i < sps->num_long_term_ref_pics_sps; ++i) {
+ /* lt_ref_pic_poc_lsb_sps */
+ vl_rbsp_u(rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
+ /* used_by_curr_pic_lt_sps_flag */
+ vl_rbsp_u(rbsp, 1);
+ }
+ }
+
+ sps->sps_temporal_mvp_enabled_flag = vl_rbsp_u(rbsp, 1);
+ sps->strong_intra_smoothing_enabled_flag = vl_rbsp_u(rbsp, 1);
+}
+
+static struct pipe_h265_pps *pic_parameter_set_id(vid_dec_PrivateType *priv,
+ struct vl_rbsp *rbsp)
+{
+ unsigned id = vl_rbsp_ue(rbsp);
+
+ if (id >= ARRAY_SIZE(priv->codec_data.h265.pps))
+ return NULL;
+
+ return &priv->codec_data.h265.pps[id];
+}
+
+static void picture_parameter_set(vid_dec_PrivateType *priv,
+ struct vl_rbsp *rbsp)
+{
+ struct pipe_h265_sps *sps;
+ struct pipe_h265_pps *pps;
+ int i;
+
+ pps = pic_parameter_set_id(priv, rbsp);
+ if (!pps)
+ return;
+
+ memset(pps, 0, sizeof(*pps));
+ sps = pps->sps = seq_parameter_set_id(priv, rbsp);
+ if (!sps)
+ return;
+
+ pps->dependent_slice_segments_enabled_flag = vl_rbsp_u(rbsp, 1);
+ pps->output_flag_present_flag = vl_rbsp_u(rbsp, 1);
+ pps->num_extra_slice_header_bits = vl_rbsp_u(rbsp, 3);
+ pps->sign_data_hiding_enabled_flag = vl_rbsp_u(rbsp, 1);
+ pps->cabac_init_present_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->num_ref_idx_l0_default_active_minus1 = vl_rbsp_ue(rbsp);
+ pps->num_ref_idx_l1_default_active_minus1 = vl_rbsp_ue(rbsp);
+ pps->init_qp_minus26 = vl_rbsp_se(rbsp);
+ pps->constrained_intra_pred_flag = vl_rbsp_u(rbsp, 1);
+ pps->transform_skip_enabled_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->cu_qp_delta_enabled_flag = vl_rbsp_u(rbsp, 1);
+ if (pps->cu_qp_delta_enabled_flag)
+ pps->diff_cu_qp_delta_depth = vl_rbsp_ue(rbsp);
+
+ pps->pps_cb_qp_offset = vl_rbsp_se(rbsp);
+ pps->pps_cr_qp_offset = vl_rbsp_se(rbsp);
+ pps->pps_slice_chroma_qp_offsets_present_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->weighted_pred_flag = vl_rbsp_u(rbsp, 1);
+ pps->weighted_bipred_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->transquant_bypass_enabled_flag = vl_rbsp_u(rbsp, 1);
+ pps->tiles_enabled_flag = vl_rbsp_u(rbsp, 1);
+ pps->entropy_coding_sync_enabled_flag = vl_rbsp_u(rbsp, 1);
+
+ if (pps->tiles_enabled_flag) {
+ pps->num_tile_columns_minus1 = vl_rbsp_ue(rbsp);
+ pps->num_tile_rows_minus1 = vl_rbsp_ue(rbsp);
+
+ pps->uniform_spacing_flag = vl_rbsp_u(rbsp, 1);
+ if (!pps->uniform_spacing_flag) {
+ for (i = 0; i < pps->num_tile_columns_minus1; ++i)
+ pps->column_width_minus1[i] = vl_rbsp_ue(rbsp);
+
+ for (i = 0; i < pps->num_tile_rows_minus1; ++i)
+ pps->row_height_minus1[i] = vl_rbsp_ue(rbsp);
+ }
+
+ if (!pps->num_tile_columns_minus1 || !pps->num_tile_rows_minus1)
+ pps->loop_filter_across_tiles_enabled_flag = vl_rbsp_u(rbsp, 1);
+ }
+
+ pps->pps_loop_filter_across_slices_enabled_flag = vl_rbsp_u(rbsp, 1);
+
+ pps->deblocking_filter_control_present_flag = vl_rbsp_u(rbsp, 1);
+ if (pps->deblocking_filter_control_present_flag) {
+ pps->deblocking_filter_override_enabled_flag = vl_rbsp_u(rbsp, 1);
+ pps->pps_deblocking_filter_disabled_flag = vl_rbsp_u(rbsp, 1);
+ if (!pps->pps_deblocking_filter_disabled_flag) {
+ pps->pps_beta_offset_div2 = vl_rbsp_se(rbsp);
+ pps->pps_tc_offset_div2 = vl_rbsp_se(rbsp);
+ }
+ }
+
+ /* pps_scaling_list_data_present_flag */
+ if (vl_rbsp_u(rbsp, 1))
+ scaling_list_data(priv, rbsp, sps);
+
+ pps->lists_modification_present_flag = vl_rbsp_u(rbsp, 1);
+ pps->log2_parallel_merge_level_minus2 = vl_rbsp_ue(rbsp);
+ pps->slice_segment_header_extension_present_flag = vl_rbsp_u(rbsp, 1);
+}
+
+static void vid_dec_h265_BeginFrame(vid_dec_PrivateType *priv)
+{
+ if (priv->frame_started)
+ return;
+
+ if (!priv->codec) {
+ struct pipe_video_codec templat = {};
+ omx_base_video_PortType *port = (omx_base_video_PortType *)
+ priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+
+ templat.profile = priv->profile;
+ templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.expect_chunked_decode = true;
+ templat.width = priv->codec_data.h265.pic_width_in_luma_samples;
+ templat.height = priv->codec_data.h265.pic_height_in_luma_samples;
+ templat.level = priv->codec_data.h265.level_idc;
+ priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
+
+ /* disable transcode tunnel if video size is different from coded size */
+ if (priv->codec_data.h265.pic_width_in_luma_samples !=
+ port->sPortParam.format.video.nFrameWidth ||
+ priv->codec_data.h265.pic_height_in_luma_samples !=
+ port->sPortParam.format.video.nFrameHeight)
+ priv->disable_tunnel = true;
+ }
+
+ vid_dec_NeedTarget(priv);
+
+ if (priv->first_buf_in_frame)
+ priv->timestamp = priv->timestamps[0];
+ priv->first_buf_in_frame = false;
+
+ priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = true;
+}
+
+static struct pipe_video_buffer *vid_dec_h265_Flush(vid_dec_PrivateType *priv,
+ OMX_TICKS *timestamp)
+{
+ struct dpb_list *entry, *result = NULL;
+ struct pipe_video_buffer *buf;
+
+ /* search for the lowest poc and break on zeros */
+ LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h265.dpb_list, list) {
+
+ if (result && entry->poc == 0)
+ break;
+
+ if (!result || entry->poc < result->poc)
+ result = entry;
+ }
+
+ if (!result)
+ return NULL;
+
+ buf = result->buffer;
+ if (timestamp)
+ *timestamp = result->timestamp;
+
+ --priv->codec_data.h265.dpb_num;
+ LIST_DEL(&result->list);
+ FREE(result);
+
+ return buf;
+}
+
+static void vid_dec_h265_EndFrame(vid_dec_PrivateType *priv)
+{
+ struct dpb_list *entry = NULL;
+ struct pipe_video_buffer *tmp;
+ struct ref_pic_set *rps;
+ int i;
+ OMX_TICKS timestamp;
+
+ if (!priv->frame_started)
+ return;
+
+ priv->picture.h265.NumPocStCurrBefore = 0;
+ priv->picture.h265.NumPocStCurrAfter = 0;
+ memset(priv->picture.h265.RefPicSetStCurrBefore, 0, 8);
+ memset(priv->picture.h265.RefPicSetStCurrAfter, 0, 8);
+ for (i = 0; i < MAX_NUM_REF_PICS; ++i) {
+ priv->picture.h265.ref[i] = NULL;
+ priv->picture.h265.PicOrderCntVal[i] = 0;
+ }
+
+ rps = priv->codec_data.h265.rps;
+
+ if (rps) {
+ unsigned bf = 0, af = 0;
+
+ priv->picture.h265.NumDeltaPocsOfRefRpsIdx = rps->num_delta_poc;
+ for (i = 0; i < rps->num_pics; ++i) {
+ priv->picture.h265.PicOrderCntVal[i] =
+ rps->delta_poc[i] + get_poc(priv);
+
+ LIST_FOR_EACH_ENTRY(entry, &priv->codec_data.h265.dpb_list, list) {
+ if (entry->poc == priv->picture.h265.PicOrderCntVal[i]) {
+ priv->picture.h265.ref[i] = entry->buffer;
+ break;
+ }
+ }
+
+ if (rps->used[i]) {
+ if (i < rps->num_neg_pics) {
+ priv->picture.h265.NumPocStCurrBefore++;
+ priv->picture.h265.RefPicSetStCurrBefore[bf++] = i;
+ } else {
+ priv->picture.h265.NumPocStCurrAfter++;
+ priv->picture.h265.RefPicSetStCurrAfter[af++] = i;
+ }
+ }
+ }
+ }
+
+ priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = false;
+
+ /* add the decoded picture to the dpb list */
+ entry = CALLOC_STRUCT(dpb_list);
+ if (!entry)
+ return;
+
+ priv->first_buf_in_frame = true;
+ entry->buffer = priv->target;
+ entry->timestamp = priv->timestamp;
+ entry->poc = get_poc(priv);
+
+ LIST_ADDTAIL(&entry->list, &priv->codec_data.h265.dpb_list);
+ ++priv->codec_data.h265.dpb_num;
+ priv->target = NULL;
+
+ if (priv->codec_data.h265.dpb_num <= DPB_MAX_SIZE)
+ return;
+
+ tmp = priv->in_buffers[0]->pInputPortPrivate;
+ priv->in_buffers[0]->pInputPortPrivate = vid_dec_h265_Flush(priv, &timestamp);
+ priv->in_buffers[0]->nTimeStamp = timestamp;
+ priv->target = tmp;
+ priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL;
+ if (priv->frame_finished &&
+ (priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS))
+ FREE(priv->codec_data.h265.ref_pic_set_list);
+}
+
+static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
+ unsigned nal_unit_type)
+{
+ struct pipe_h265_pps *pps;
+ struct pipe_h265_sps *sps;
+ bool first_slice_segment_in_pic_flag;
+ bool dependent_slice_segment_flag = false;
+ struct ref_pic_set *rps;
+ unsigned poc_lsb, poc_msb, slice_prev_poc;
+ unsigned max_poc_lsb, prev_poc_lsb, prev_poc_msb;
+ unsigned num_st_rps;
+ int i;
+
+ if (priv->picture.h265.IDRPicFlag != is_idr_picture(nal_unit_type))
+ vid_dec_h265_EndFrame(priv);
+
+ priv->picture.h265.IDRPicFlag = is_idr_picture(nal_unit_type);
+
+ first_slice_segment_in_pic_flag = vl_rbsp_u(rbsp, 1);
+
+ if (is_rap_picture(nal_unit_type))
+ /* no_output_of_prior_pics_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ pps = pic_parameter_set_id(priv, rbsp);
+ if (!pps)
+ return;
+
+ sps = pps->sps;
+ if (!sps)
+ return;
+
+ if (pps != priv->picture.h265.pps)
+ vid_dec_h265_EndFrame(priv);
+
+ priv->picture.h265.pps = pps;
+
+ if (priv->picture.h265.RAPPicFlag != is_rap_picture(nal_unit_type))
+ vid_dec_h265_EndFrame(priv);
+ priv->picture.h265.RAPPicFlag = is_rap_picture(nal_unit_type);
+
+ num_st_rps = sps->num_short_term_ref_pic_sets;
+
+ if (priv->picture.h265.CurrRpsIdx != num_st_rps)
+ vid_dec_h265_EndFrame(priv);
+ priv->picture.h265.CurrRpsIdx = num_st_rps;
+
+ if (!first_slice_segment_in_pic_flag) {
+ int size, num;
+ int bits_slice_segment_address = 0;
+
+ if (pps->dependent_slice_segments_enabled_flag)
+ dependent_slice_segment_flag = vl_rbsp_u(rbsp, 1);
+
+ size = 1 << (sps->log2_min_luma_coding_block_size_minus3 + 3 +
+ sps->log2_diff_max_min_luma_coding_block_size);
+
+ num = ((sps->pic_width_in_luma_samples + size - 1) / size) *
+ ((sps->pic_height_in_luma_samples + size - 1) / size);
+
+ while (num > (1 << bits_slice_segment_address))
+ bits_slice_segment_address++;
+
+ /* slice_segment_address */
+ vl_rbsp_u(rbsp, bits_slice_segment_address);
+ }
+
+ if (dependent_slice_segment_flag)
+ return;
+
+ for (i = 0; i < pps->num_extra_slice_header_bits; ++i)
+ /* slice_reserved_flag */
+ vl_rbsp_u(rbsp, 1);
+
+ /* slice_type */
+ vl_rbsp_ue(rbsp);
+
+ if (pps->output_flag_present_flag)
+ /* pic output flag */
+ vl_rbsp_u(rbsp, 1);
+
+ if (sps->separate_colour_plane_flag)
+ /* colour_plane_id */
+ vl_rbsp_u(rbsp, 2);
+
+ if (is_idr_picture(nal_unit_type)) {
+ set_poc(priv, nal_unit_type, 0);
+ return;
+ }
+
+ /* slice_pic_order_cnt_lsb */
+ poc_lsb =
+ vl_rbsp_u(rbsp, sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
+
+ slice_prev_poc = (int)priv->codec_data.h265.slice_prev_poc;
+ max_poc_lsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
+
+ prev_poc_lsb = slice_prev_poc & (max_poc_lsb - 1);
+ prev_poc_msb = slice_prev_poc - prev_poc_lsb;
+
+ if ((poc_lsb < prev_poc_lsb) &&
+ ((prev_poc_lsb - poc_lsb ) >= (max_poc_lsb / 2)))
+ poc_msb = prev_poc_msb + max_poc_lsb;
+
+ else if ((poc_lsb > prev_poc_lsb ) &&
+ ((poc_lsb - prev_poc_lsb) > (max_poc_lsb / 2)))
+ poc_msb = prev_poc_msb - max_poc_lsb;
+
+ else
+ poc_msb = prev_poc_msb;
+
+ if (is_bla_picture(nal_unit_type))
+ poc_msb = 0;
+
+ if (get_poc(priv) != poc_msb + poc_lsb)
+ vid_dec_h265_EndFrame(priv);
+
+ set_poc(priv, nal_unit_type, (poc_msb + poc_lsb));
+
+ /* short_term_ref_pic_set_sps_flag */
+ if (!vl_rbsp_u(rbsp, 1)) {
+ rps = (struct ref_pic_set *)
+ priv->codec_data.h265.ref_pic_set_list + num_st_rps;
+ st_ref_pic_set(priv, rbsp, rps, sps, num_st_rps);
+
+ } else if (num_st_rps > 1) {
+ int num_bits = 0;
+ unsigned idx;
+
+ while ((1 << num_bits) < num_st_rps)
+ num_bits++;
+
+ if (num_bits > 0)
+ /* short_term_ref_pic_set_idx */
+ idx = vl_rbsp_u(rbsp, num_bits);
+ else
+ idx = 0;
+
+ rps = (struct ref_pic_set *)
+ priv->codec_data.h265.ref_pic_set_list + idx;
+ } else
+ rps = (struct ref_pic_set *)
+ priv->codec_data.h265.ref_pic_set_list;
+
+ if (is_bla_picture(nal_unit_type)) {
+ rps->num_neg_pics = 0;
+ rps->num_pos_pics = 0;
+ rps->num_pics = 0;
+ }
+
+ priv->codec_data.h265.rps = rps;
+
+ return;
+}
+
+static void vid_dec_h265_Decode(vid_dec_PrivateType *priv,
+ struct vl_vlc *vlc,
+ unsigned min_bits_left)
+{
+ unsigned nal_unit_type;
+ unsigned nuh_layer_id;
+ unsigned nuh_temporal_id_plus1;
+
+ if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00))
+ return;
+
+ if (vl_vlc_peekbits(vlc, 24) != 0x000001) {
+ vl_vlc_eatbits(vlc, 8);
+ return;
+ }
+
+ if (priv->slice) {
+ unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8);
+
+ priv->codec->decode_bitstream(priv->codec, priv->target,
+ &priv->picture.base, 1,
+ &priv->slice, &bytes);
+ priv->slice = NULL;
+ }
+
+ vl_vlc_eatbits(vlc, 24);
+
+ /* forbidden_zero_bit */
+ vl_vlc_eatbits(vlc, 1);
+
+ if (vl_vlc_valid_bits(vlc) < 15)
+ vl_vlc_fillbits(vlc);
+
+ nal_unit_type = vl_vlc_get_uimsbf(vlc, 6);
+
+ /* nuh_layer_id */
+ nuh_layer_id = vl_vlc_get_uimsbf(vlc, 6);
+
+ /* nuh_temporal_id_plus1 */
+ nuh_temporal_id_plus1 = vl_vlc_get_uimsbf(vlc, 3);
+ priv->codec_data.h265.temporal_id = nuh_temporal_id_plus1 - 1;
+
+ if (!is_slice_picture(nal_unit_type))
+ vid_dec_h265_EndFrame(priv);
+
+ if (nal_unit_type == NAL_UNIT_TYPE_SPS) {
+ struct vl_rbsp rbsp;
+
+ vl_rbsp_init(&rbsp, vlc, ~0);
+ seq_parameter_set(priv, &rbsp);
+
+ } else if (nal_unit_type == NAL_UNIT_TYPE_PPS) {
+ struct vl_rbsp rbsp;
+
+ vl_rbsp_init(&rbsp, vlc, ~0);
+ picture_parameter_set(priv, &rbsp);
+
+ } else if (is_slice_picture(nal_unit_type)) {
+ unsigned bits = vl_vlc_valid_bits(vlc);
+ unsigned bytes = bits / 8 + 5;
+ struct vl_rbsp rbsp;
+ uint8_t buf[9];
+ const void *ptr = buf;
+ unsigned i;
+
+ buf[0] = 0x0;
+ buf[1] = 0x0;
+ buf[2] = 0x1;
+ buf[3] = nal_unit_type << 1 | nuh_layer_id >> 5;
+ buf[4] = nuh_layer_id << 3 | nuh_temporal_id_plus1;
+ for (i = 5; i < bytes; ++i)
+ buf[i] = vl_vlc_peekbits(vlc, bits) >> ((bytes - i - 1) * 8);
+
+ priv->bytes_left = (vl_vlc_bits_left(vlc) - bits) / 8;
+ priv->slice = vlc->data;
+
+ vl_rbsp_init(&rbsp, vlc, 128);
+ slice_header(priv, &rbsp, nal_unit_type);
+
+ vid_dec_h265_BeginFrame(priv);
+
+ priv->codec->decode_bitstream(priv->codec, priv->target,
+ &priv->picture.base, 1,
+ &ptr, &bytes);
+ }
+
+ /* resync to byte boundary */
+ vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8);
+}
+
+void vid_dec_h265_Init(vid_dec_PrivateType *priv)
+{
+ priv->picture.base.profile = PIPE_VIDEO_PROFILE_HEVC_MAIN;
+
+ LIST_INITHEAD(&priv->codec_data.h265.dpb_list);
+ priv->codec_data.h265.ref_pic_set_list = (struct ref_pic_set *)
+ CALLOC(MAX_NUM_REF_PICS, sizeof(struct ref_pic_set));
+
+ priv->Decode = vid_dec_h265_Decode;
+ priv->EndFrame = vid_dec_h265_EndFrame;
+ priv->Flush = vid_dec_h265_Flush;
+ priv->first_buf_in_frame = true;
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c
new file mode 100644
index 000000000..f64c2b83f
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_dec_mpeg12.c
@@ -0,0 +1,383 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include "pipe/p_video_codec.h"
+#include "vl/vl_vlc.h"
+#include "vl/vl_zscan.h"
+
+#include "vid_dec.h"
+
+static uint8_t default_intra_matrix[64] = {
+ 8, 16, 19, 22, 26, 27, 29, 34,
+ 16, 16, 22, 24, 27, 29, 34, 37,
+ 19, 22, 26, 27, 29, 34, 34, 38,
+ 22, 22, 26, 27, 29, 34, 37, 40,
+ 22, 26, 27, 29, 32, 35, 40, 48,
+ 26, 27, 29, 32, 35, 40, 48, 58,
+ 26, 27, 29, 34, 38, 46, 56, 69,
+ 27, 29, 35, 38, 46, 56, 69, 83
+};
+
+static uint8_t default_non_intra_matrix[64] = {
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+};
+
+static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
+static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv);
+static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
+
+void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv)
+{
+ struct pipe_video_codec templat = {};
+ omx_base_video_PortType *port;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ templat.profile = priv->profile;
+ templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.max_references = 2;
+ templat.expect_chunked_decode = true;
+ templat.width = port->sPortParam.format.video.nFrameWidth;
+ templat.height = port->sPortParam.format.video.nFrameHeight;
+
+ priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat);
+
+ priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+ priv->picture.mpeg12.intra_matrix = default_intra_matrix;
+ priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix;
+
+ priv->Decode = vid_dec_mpeg12_Decode;
+ priv->EndFrame = vid_dec_mpeg12_EndFrame;
+ priv->Flush = vid_dec_mpeg12_Flush;
+}
+
+static void BeginFrame(vid_dec_PrivateType *priv)
+{
+ if (priv->picture.mpeg12.picture_coding_type != PIPE_MPEG12_PICTURE_CODING_TYPE_B) {
+ priv->picture.mpeg12.ref[0] = priv->picture.mpeg12.ref[1];
+ priv->picture.mpeg12.ref[1] = NULL;
+ }
+
+ if (priv->target == priv->picture.mpeg12.ref[0]) {
+ struct pipe_video_buffer *tmp = priv->target;
+ priv->target = priv->shadow;
+ priv->shadow = tmp;
+ }
+
+ vid_dec_NeedTarget(priv);
+
+ priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = true;
+}
+
+static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv)
+{
+ struct pipe_video_buffer *done;
+
+ priv->codec->end_frame(priv->codec, priv->target, &priv->picture.base);
+ priv->frame_started = false;
+
+ if (priv->picture.mpeg12.picture_coding_type != PIPE_MPEG12_PICTURE_CODING_TYPE_B) {
+
+ priv->picture.mpeg12.ref[1] = priv->target;
+ done = priv->picture.mpeg12.ref[0];
+ if (!done) {
+ priv->target = NULL;
+ return;
+ }
+
+ } else
+ done = priv->target;
+
+ priv->frame_finished = true;
+ priv->target = priv->in_buffers[0]->pInputPortPrivate;
+ priv->in_buffers[0]->pInputPortPrivate = done;
+}
+
+static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp)
+{
+ struct pipe_video_buffer *result = priv->picture.mpeg12.ref[1];
+ priv->picture.mpeg12.ref[1] = NULL;
+ if (timestamp)
+ *timestamp = OMX_VID_DEC_TIMESTAMP_INVALID;
+ return result;
+}
+
+static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left)
+{
+ uint8_t code;
+ unsigned i;
+
+ if (!vl_vlc_search_byte(vlc, vl_vlc_bits_left(vlc) - min_bits_left, 0x00))
+ return;
+
+ if (vl_vlc_peekbits(vlc, 24) != 0x000001) {
+ vl_vlc_eatbits(vlc, 8);
+ return;
+ }
+
+ if (priv->slice) {
+ unsigned bytes = priv->bytes_left - (vl_vlc_bits_left(vlc) / 8);
+ priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base,
+ 1, &priv->slice, &bytes);
+ priv->slice = NULL;
+ }
+
+ vl_vlc_eatbits(vlc, 24);
+ code = vl_vlc_get_uimsbf(vlc, 8);
+
+ if (priv->frame_started && (code == 0x00 || code > 0xAF))
+ vid_dec_mpeg12_EndFrame(priv);
+
+ if (code == 0xB3) {
+ /* sequence header code */
+ vl_vlc_fillbits(vlc);
+
+ /* horizontal_size_value */
+ vl_vlc_get_uimsbf(vlc, 12);
+
+ /* vertical_size_value */
+ vl_vlc_get_uimsbf(vlc, 12);
+
+ /* aspect_ratio_information */
+ vl_vlc_get_uimsbf(vlc, 4);
+
+ /* frame_rate_code */
+ vl_vlc_get_uimsbf(vlc, 4);
+
+ vl_vlc_fillbits(vlc);
+
+ /* bit_rate_value */
+ vl_vlc_get_uimsbf(vlc, 18);
+
+ /* marker_bit */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ /* vbv_buffer_size_value */
+ vl_vlc_get_uimsbf(vlc, 10);
+
+ /* constrained_parameters_flag */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ vl_vlc_fillbits(vlc);
+
+ /* load_intra_quantiser_matrix */
+ if (vl_vlc_get_uimsbf(vlc, 1)) {
+ /* intra_quantiser_matrix */
+ priv->picture.mpeg12.intra_matrix = priv->codec_data.mpeg12.intra_matrix;
+ for (i = 0; i < 64; ++i) {
+ priv->codec_data.mpeg12.intra_matrix[vl_zscan_normal[i]] = vl_vlc_get_uimsbf(vlc, 8);
+ vl_vlc_fillbits(vlc);
+ }
+ } else
+ priv->picture.mpeg12.intra_matrix = default_intra_matrix;
+
+ /* load_non_intra_quantiser_matrix */
+ if (vl_vlc_get_uimsbf(vlc, 1)) {
+ /* non_intra_quantiser_matrix */
+ priv->picture.mpeg12.non_intra_matrix = priv->codec_data.mpeg12.non_intra_matrix;
+ for (i = 0; i < 64; ++i) {
+ priv->codec_data.mpeg12.non_intra_matrix[i] = vl_vlc_get_uimsbf(vlc, 8);
+ vl_vlc_fillbits(vlc);
+ }
+ } else
+ priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix;
+
+ } else if (code == 0x00) {
+ /* picture start code */
+ vl_vlc_fillbits(vlc);
+
+ /* temporal_reference */
+ vl_vlc_get_uimsbf(vlc, 10);
+
+ priv->picture.mpeg12.picture_coding_type = vl_vlc_get_uimsbf(vlc, 3);
+
+ /* vbv_delay */
+ vl_vlc_get_uimsbf(vlc, 16);
+
+ vl_vlc_fillbits(vlc);
+ if (priv->picture.mpeg12.picture_coding_type == 2 ||
+ priv->picture.mpeg12.picture_coding_type == 3) {
+ priv->picture.mpeg12.full_pel_forward_vector = vl_vlc_get_uimsbf(vlc, 1);
+ /* forward_f_code */
+ priv->picture.mpeg12.f_code[0][0] = vl_vlc_get_uimsbf(vlc, 3) - 1;
+ priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0];
+ } else {
+ priv->picture.mpeg12.full_pel_forward_vector = 0;
+ priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0] = 14;
+ }
+
+ if (priv->picture.mpeg12.picture_coding_type == 3) {
+ priv->picture.mpeg12.full_pel_backward_vector = vl_vlc_get_uimsbf(vlc, 1);
+ /* backward_f_code */
+ priv->picture.mpeg12.f_code[1][0] = vl_vlc_get_uimsbf(vlc, 3) - 1;
+ priv->picture.mpeg12.f_code[1][1] = priv->picture.mpeg12.f_code[1][0];
+ } else {
+ priv->picture.mpeg12.full_pel_backward_vector = 0;
+ priv->picture.mpeg12.f_code[0][1] = priv->picture.mpeg12.f_code[0][0] = 14;
+ }
+
+ /* extra_bit_picture */
+ while (vl_vlc_get_uimsbf(vlc, 1)) {
+ /* extra_information_picture */
+ vl_vlc_get_uimsbf(vlc, 8);
+ vl_vlc_fillbits(vlc);
+ }
+
+ } else if (code == 0xB5) {
+ /* extension start code */
+ vl_vlc_fillbits(vlc);
+
+ /* extension_start_code_identifier */
+ switch (vl_vlc_get_uimsbf(vlc, 4)) {
+ case 0x3: /* quant matrix extension */
+
+ /* load_intra_quantiser_matrix */
+ if (vl_vlc_get_uimsbf(vlc, 1)) {
+ /* intra_quantiser_matrix */
+ priv->picture.mpeg12.intra_matrix = priv->codec_data.mpeg12.intra_matrix;
+ for (i = 0; i < 64; ++i) {
+ priv->codec_data.mpeg12.intra_matrix[vl_zscan_normal[i]] = vl_vlc_get_uimsbf(vlc, 8);
+ vl_vlc_fillbits(vlc);
+ }
+ } else
+ priv->picture.mpeg12.intra_matrix = default_intra_matrix;
+
+ /* load_non_intra_quantiser_matrix */
+ if (vl_vlc_get_uimsbf(vlc, 1)) {
+ /* non_intra_quantiser_matrix */
+ priv->picture.mpeg12.non_intra_matrix = priv->codec_data.mpeg12.non_intra_matrix;
+ for (i = 0; i < 64; ++i) {
+ priv->codec_data.mpeg12.non_intra_matrix[i] = vl_vlc_get_uimsbf(vlc, 8);
+ vl_vlc_fillbits(vlc);
+ }
+ } else
+ priv->picture.mpeg12.intra_matrix = default_non_intra_matrix;
+
+ break;
+
+ case 0x8: /* picture coding extension */
+
+ priv->picture.mpeg12.f_code[0][0] = vl_vlc_get_uimsbf(vlc, 4) - 1;
+ priv->picture.mpeg12.f_code[0][1] = vl_vlc_get_uimsbf(vlc, 4) - 1;
+ priv->picture.mpeg12.f_code[1][0] = vl_vlc_get_uimsbf(vlc, 4) - 1;
+ priv->picture.mpeg12.f_code[1][1] = vl_vlc_get_uimsbf(vlc, 4) - 1;
+ priv->picture.mpeg12.intra_dc_precision = vl_vlc_get_uimsbf(vlc, 2);
+ priv->picture.mpeg12.picture_structure = vl_vlc_get_uimsbf(vlc, 2);
+ priv->picture.mpeg12.top_field_first = vl_vlc_get_uimsbf(vlc, 1);
+ priv->picture.mpeg12.frame_pred_frame_dct = vl_vlc_get_uimsbf(vlc, 1);
+ priv->picture.mpeg12.concealment_motion_vectors = vl_vlc_get_uimsbf(vlc, 1);
+ priv->picture.mpeg12.q_scale_type = vl_vlc_get_uimsbf(vlc, 1);
+ priv->picture.mpeg12.intra_vlc_format = vl_vlc_get_uimsbf(vlc, 1);
+ priv->picture.mpeg12.alternate_scan = vl_vlc_get_uimsbf(vlc, 1);
+
+ /* repeat_first_field */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ /* chroma_420_type */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ vl_vlc_fillbits(vlc);
+
+ /* progressive_frame */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ /* composite_display_flag */
+ if (vl_vlc_get_uimsbf(vlc, 1)) {
+
+ /* v_axis */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ /* field_sequence */
+ vl_vlc_get_uimsbf(vlc, 3);
+
+ /* sub_carrier */
+ vl_vlc_get_uimsbf(vlc, 1);
+
+ /* burst_amplitude */
+ vl_vlc_get_uimsbf(vlc, 7);
+
+ /* sub_carrier_phase */
+ vl_vlc_get_uimsbf(vlc, 8);
+ }
+ break;
+ }
+
+ } else if (code <= 0xAF) {
+ /* slice start */
+ unsigned bytes = (vl_vlc_valid_bits(vlc) / 8) + 4;
+ uint8_t buf[12];
+ const void *ptr = buf;
+ unsigned i;
+
+ if (!priv->frame_started)
+ BeginFrame(priv);
+
+ buf[0] = 0x00;
+ buf[1] = 0x00;
+ buf[2] = 0x01;
+ buf[3] = code;
+ for (i = 4; i < bytes; ++i)
+ buf[i] = vl_vlc_get_uimsbf(vlc, 8);
+
+ priv->codec->decode_bitstream(priv->codec, priv->target, &priv->picture.base,
+ 1, &ptr, &bytes);
+
+ priv->bytes_left = vl_vlc_bits_left(vlc) / 8;
+ priv->slice = vlc->data;
+
+ } else if (code == 0xB2) {
+ /* user data start */
+
+ } else if (code == 0xB4) {
+ /* sequence error */
+ } else if (code == 0xB7) {
+ /* sequence end */
+ } else if (code == 0xB8) {
+ /* group start */
+ } else if (code >= 0xB9) {
+ /* system start */
+ } else {
+ /* reserved */
+ }
+
+ /* resync to byte boundary */
+ vl_vlc_eatbits(vlc, vl_vlc_valid_bits(vlc) % 8);
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c
new file mode 100644
index 000000000..1a4fb62d4
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.c
@@ -0,0 +1,1278 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+
+#include <assert.h>
+
+#include <OMX_Video.h>
+
+/* bellagio defines a DEBUG macro that we don't want */
+#ifndef DEBUG
+#include <bellagio/omxcore.h>
+#undef DEBUG
+#else
+#include <bellagio/omxcore.h>
+#endif
+
+#include <bellagio/omx_base_video_port.h>
+
+#include "pipe/p_screen.h"
+#include "pipe/p_video_codec.h"
+#include "util/u_memory.h"
+#include "vl/vl_video_buffer.h"
+
+#include "entrypoint.h"
+#include "vid_enc.h"
+
+struct encode_task {
+ struct list_head list;
+
+ struct pipe_video_buffer *buf;
+ unsigned pic_order_cnt;
+ struct pipe_resource *bitstream;
+ void *feedback;
+};
+
+struct input_buf_private {
+ struct list_head tasks;
+
+ struct pipe_resource *resource;
+ struct pipe_transfer *transfer;
+};
+
+struct output_buf_private {
+ struct pipe_resource *bitstream;
+ struct pipe_transfer *transfer;
+};
+
+static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name);
+static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp);
+static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param);
+static OMX_ERRORTYPE vid_enc_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param);
+static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config);
+static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config);
+static OMX_ERRORTYPE vid_enc_MessageHandler(OMX_COMPONENTTYPE *comp, internalRequestMessageType *msg);
+static OMX_ERRORTYPE vid_enc_AllocateInBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf,
+ OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size);
+static OMX_ERRORTYPE vid_enc_UseInBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE **buf, OMX_U32 idx,
+ OMX_PTR private, OMX_U32 size, OMX_U8 *mem);
+static OMX_ERRORTYPE vid_enc_FreeInBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf);
+static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf);
+static OMX_ERRORTYPE vid_enc_AllocateOutBuffer(omx_base_PortType *comp, OMX_INOUT OMX_BUFFERHEADERTYPE **buf,
+ OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size);
+static OMX_ERRORTYPE vid_enc_FreeOutBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf);
+static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output);
+
+static void enc_ReleaseTasks(struct list_head *head);
+
+OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp)
+{
+ comp->componentVersion.s.nVersionMajor = 0;
+ comp->componentVersion.s.nVersionMinor = 0;
+ comp->componentVersion.s.nRevision = 0;
+ comp->componentVersion.s.nStep = 1;
+ comp->name_specific_length = 1;
+ comp->constructor = vid_enc_Constructor;
+
+ comp->name = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (!comp->name)
+ return OMX_ErrorInsufficientResources;
+
+ comp->name_specific = CALLOC(1, sizeof(char *));
+ if (!comp->name_specific)
+ goto error_arrays;
+
+ comp->role_specific = CALLOC(1, sizeof(char *));
+ if (!comp->role_specific)
+ goto error_arrays;
+
+ comp->name_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->name_specific[0] == NULL)
+ goto error_specific;
+
+ comp->role_specific[0] = CALLOC(1, OMX_MAX_STRINGNAME_SIZE);
+ if (comp->role_specific[0] == NULL)
+ goto error_specific;
+
+ strcpy(comp->name, OMX_VID_ENC_BASE_NAME);
+ strcpy(comp->name_specific[0], OMX_VID_ENC_AVC_NAME);
+ strcpy(comp->role_specific[0], OMX_VID_ENC_AVC_ROLE);
+
+ return OMX_ErrorNone;
+
+error_specific:
+ FREE(comp->role_specific[0]);
+ FREE(comp->name_specific[0]);
+
+error_arrays:
+ FREE(comp->role_specific);
+ FREE(comp->name_specific);
+
+ FREE(comp->name);
+
+ return OMX_ErrorInsufficientResources;
+}
+
+static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING name)
+{
+ vid_enc_PrivateType *priv;
+ omx_base_video_PortType *port;
+ struct pipe_screen *screen;
+ OMX_ERRORTYPE r;
+ int i;
+
+ assert(!comp->pComponentPrivate);
+
+ priv = comp->pComponentPrivate = CALLOC(1, sizeof(vid_enc_PrivateType));
+ if (!priv)
+ return OMX_ErrorInsufficientResources;
+
+ r = omx_base_filter_Constructor(comp, name);
+ if (r)
+ return r;
+
+ priv->BufferMgmtCallback = vid_enc_BufferEncoded;
+ priv->messageHandler = vid_enc_MessageHandler;
+ priv->destructor = vid_enc_Destructor;
+
+ comp->SetParameter = vid_enc_SetParameter;
+ comp->GetParameter = vid_enc_GetParameter;
+ comp->GetConfig = vid_enc_GetConfig;
+ comp->SetConfig = vid_enc_SetConfig;
+
+ priv->screen = omx_get_screen();
+ if (!priv->screen)
+ return OMX_ErrorInsufficientResources;
+
+ screen = priv->screen->pscreen;
+ if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
+ PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
+ return OMX_ErrorBadParameter;
+
+ priv->s_pipe = screen->context_create(screen, NULL, 0);
+ if (!priv->s_pipe)
+ return OMX_ErrorInsufficientResources;
+
+ if (!vl_compositor_init(&priv->compositor, priv->s_pipe)) {
+ priv->s_pipe->destroy(priv->s_pipe);
+ priv->s_pipe = NULL;
+ return OMX_ErrorInsufficientResources;
+ }
+
+ if (!vl_compositor_init_state(&priv->cstate, priv->s_pipe)) {
+ vl_compositor_cleanup(&priv->compositor);
+ priv->s_pipe->destroy(priv->s_pipe);
+ priv->s_pipe = NULL;
+ return OMX_ErrorInsufficientResources;
+ }
+
+ priv->t_pipe = screen->context_create(screen, NULL, 0);
+ if (!priv->t_pipe)
+ return OMX_ErrorInsufficientResources;
+
+ priv->sPortTypesParam[OMX_PortDomainVideo].nStartPortNumber = 0;
+ priv->sPortTypesParam[OMX_PortDomainVideo].nPorts = 2;
+ priv->ports = CALLOC(2, sizeof(omx_base_PortType *));
+ if (!priv->ports)
+ return OMX_ErrorInsufficientResources;
+
+ for (i = 0; i < 2; ++i) {
+ priv->ports[i] = CALLOC(1, sizeof(omx_base_video_PortType));
+ if (!priv->ports[i])
+ return OMX_ErrorInsufficientResources;
+
+ base_video_port_Constructor(comp, &priv->ports[i], i, i == 0);
+ }
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ port->sPortParam.format.video.nFrameWidth = 176;
+ port->sPortParam.format.video.nFrameHeight = 144;
+ port->sPortParam.format.video.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar;
+ port->sVideoParam.eColorFormat = OMX_COLOR_FormatYUV420SemiPlanar;
+ port->sPortParam.nBufferCountActual = 8;
+ port->sPortParam.nBufferCountMin = 4;
+
+ port->Port_SendBufferFunction = vid_enc_EncodeFrame;
+ port->Port_AllocateBuffer = vid_enc_AllocateInBuffer;
+ port->Port_UseBuffer = vid_enc_UseInBuffer;
+ port->Port_FreeBuffer = vid_enc_FreeInBuffer;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
+ strcpy(port->sPortParam.format.video.cMIMEType,"video/H264");
+ port->sPortParam.format.video.nFrameWidth = 176;
+ port->sPortParam.format.video.nFrameHeight = 144;
+ port->sPortParam.format.video.eCompressionFormat = OMX_VIDEO_CodingAVC;
+ port->sVideoParam.eCompressionFormat = OMX_VIDEO_CodingAVC;
+
+ port->Port_AllocateBuffer = vid_enc_AllocateOutBuffer;
+ port->Port_FreeBuffer = vid_enc_FreeOutBuffer;
+
+ priv->bitrate.eControlRate = OMX_Video_ControlRateDisable;
+ priv->bitrate.nTargetBitrate = 0;
+
+ priv->quant.nQpI = OMX_VID_ENC_QUANT_I_FRAMES_DEFAULT;
+ priv->quant.nQpP = OMX_VID_ENC_QUANT_P_FRAMES_DEFAULT;
+ priv->quant.nQpB = OMX_VID_ENC_QUANT_B_FRAMES_DEFAULT;
+
+ priv->profile_level.eProfile = OMX_VIDEO_AVCProfileBaseline;
+ priv->profile_level.eLevel = OMX_VIDEO_AVCLevel51;
+
+ priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
+ priv->frame_num = 0;
+ priv->pic_order_cnt = 0;
+ priv->restricted_b_frames = debug_get_bool_option("OMX_USE_RESTRICTED_B_FRAMES", FALSE);
+
+ priv->scale.xWidth = OMX_VID_ENC_SCALING_WIDTH_DEFAULT;
+ priv->scale.xHeight = OMX_VID_ENC_SCALING_WIDTH_DEFAULT;
+
+ LIST_INITHEAD(&priv->free_tasks);
+ LIST_INITHEAD(&priv->used_tasks);
+ LIST_INITHEAD(&priv->b_frames);
+ LIST_INITHEAD(&priv->stacked_tasks);
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp)
+{
+ vid_enc_PrivateType* priv = comp->pComponentPrivate;
+ int i;
+
+ enc_ReleaseTasks(&priv->free_tasks);
+ enc_ReleaseTasks(&priv->used_tasks);
+ enc_ReleaseTasks(&priv->b_frames);
+ enc_ReleaseTasks(&priv->stacked_tasks);
+
+ if (priv->ports) {
+ for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) {
+ if(priv->ports[i])
+ priv->ports[i]->PortDestructor(priv->ports[i]);
+ }
+ FREE(priv->ports);
+ priv->ports=NULL;
+ }
+
+ for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i)
+ if (priv->scale_buffer[i])
+ priv->scale_buffer[i]->destroy(priv->scale_buffer[i]);
+
+ if (priv->s_pipe) {
+ vl_compositor_cleanup_state(&priv->cstate);
+ vl_compositor_cleanup(&priv->compositor);
+ priv->s_pipe->destroy(priv->s_pipe);
+ }
+
+ if (priv->t_pipe)
+ priv->t_pipe->destroy(priv->t_pipe);
+
+ if (priv->screen)
+ omx_put_screen();
+
+ return omx_workaround_Destructor(comp);
+}
+
+static OMX_ERRORTYPE enc_AllocateBackTexture(omx_base_PortType *port,
+ struct pipe_resource **resource,
+ struct pipe_transfer **transfer,
+ OMX_U8 **map)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct pipe_resource buf_templ;
+ struct pipe_box box = {};
+ OMX_U8 *ptr;
+
+ memset(&buf_templ, 0, sizeof buf_templ);
+ buf_templ.target = PIPE_TEXTURE_2D;
+ buf_templ.format = PIPE_FORMAT_I8_UNORM;
+ buf_templ.bind = PIPE_BIND_LINEAR;
+ buf_templ.usage = PIPE_USAGE_STAGING;
+ buf_templ.flags = 0;
+ buf_templ.width0 = port->sPortParam.format.video.nFrameWidth;
+ buf_templ.height0 = port->sPortParam.format.video.nFrameHeight * 3 / 2;
+ buf_templ.depth0 = 1;
+ buf_templ.array_size = 1;
+
+ *resource = priv->s_pipe->screen->resource_create(priv->s_pipe->screen, &buf_templ);
+ if (!*resource)
+ return OMX_ErrorInsufficientResources;
+
+ box.width = (*resource)->width0;
+ box.height = (*resource)->height0;
+ box.depth = (*resource)->depth0;
+ ptr = priv->s_pipe->transfer_map(priv->s_pipe, *resource, 0, PIPE_TRANSFER_WRITE, &box, transfer);
+ if (map)
+ *map = ptr;
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+
+ if (!param)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexParamPortDefinition: {
+ OMX_PARAM_PORTDEFINITIONTYPE *def = param;
+
+ r = omx_base_component_SetParameter(handle, idx, param);
+ if (r)
+ return r;
+
+ if (def->nPortIndex == OMX_BASE_FILTER_INPUTPORT_INDEX) {
+ omx_base_video_PortType *port;
+ unsigned framesize;
+ struct pipe_resource *resource;
+ struct pipe_transfer *transfer;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+ enc_AllocateBackTexture(priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX],
+ &resource, &transfer, NULL);
+ port->sPortParam.format.video.nStride = transfer->stride;
+ pipe_transfer_unmap(priv->s_pipe, transfer);
+ pipe_resource_reference(&resource, NULL);
+
+ framesize = port->sPortParam.format.video.nStride *
+ port->sPortParam.format.video.nFrameHeight;
+ port->sPortParam.format.video.nSliceHeight = port->sPortParam.format.video.nFrameHeight;
+ port->sPortParam.nBufferSize = framesize * 3 / 2;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
+ port->sPortParam.nBufferSize = framesize * 512 / (16*16);
+
+ priv->frame_rate = def->format.video.xFramerate;
+
+ priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged,
+ OMX_BASE_FILTER_OUTPUTPORT_INDEX, 0, NULL);
+ }
+ break;
+ }
+ case OMX_IndexParamStandardComponentRole: {
+ OMX_PARAM_COMPONENTROLETYPE *role = param;
+
+ r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
+ if (r)
+ return r;
+
+ if (strcmp((char *)role->cRole, OMX_VID_ENC_AVC_ROLE)) {
+ return OMX_ErrorBadParameter;
+ }
+
+ break;
+ }
+ case OMX_IndexParamVideoBitrate: {
+ OMX_VIDEO_PARAM_BITRATETYPE *bitrate = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_BITRATETYPE));
+ if (r)
+ return r;
+
+ priv->bitrate = *bitrate;
+
+ break;
+ }
+ case OMX_IndexParamVideoQuantization: {
+ OMX_VIDEO_PARAM_QUANTIZATIONTYPE *quant = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_QUANTIZATIONTYPE));
+ if (r)
+ return r;
+
+ priv->quant = *quant;
+
+ break;
+ }
+ case OMX_IndexParamVideoProfileLevelCurrent: {
+ OMX_VIDEO_PARAM_PROFILELEVELTYPE *profile_level = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PROFILELEVELTYPE));
+ if (r)
+ return r;
+
+ priv->profile_level = *profile_level;
+
+ break;
+ }
+ default:
+ return omx_base_component_SetParameter(handle, idx, param);
+ }
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR param)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+
+ if (!param)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexParamStandardComponentRole: {
+ OMX_PARAM_COMPONENTROLETYPE *role = param;
+
+ r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
+ if (r)
+ return r;
+
+ strcpy((char *)role->cRole, OMX_VID_ENC_AVC_ROLE);
+ break;
+ }
+ case OMX_IndexParamVideoInit:
+ r = checkHeader(param, sizeof(OMX_PORT_PARAM_TYPE));
+ if (r)
+ return r;
+
+ memcpy(param, &priv->sPortTypesParam[OMX_PortDomainVideo], sizeof(OMX_PORT_PARAM_TYPE));
+ break;
+
+ case OMX_IndexParamVideoPortFormat: {
+ OMX_VIDEO_PARAM_PORTFORMATTYPE *format = param;
+ omx_base_video_PortType *port;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ if (r)
+ return r;
+
+ if (format->nPortIndex > 1)
+ return OMX_ErrorBadPortIndex;
+ if (format->nIndex >= 1)
+ return OMX_ErrorNoMore;
+
+ port = (omx_base_video_PortType *)priv->ports[format->nPortIndex];
+ memcpy(format, &port->sVideoParam, sizeof(OMX_VIDEO_PARAM_PORTFORMATTYPE));
+ break;
+ }
+ case OMX_IndexParamVideoBitrate: {
+ OMX_VIDEO_PARAM_BITRATETYPE *bitrate = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_BITRATETYPE));
+ if (r)
+ return r;
+
+ bitrate->eControlRate = priv->bitrate.eControlRate;
+ bitrate->nTargetBitrate = priv->bitrate.nTargetBitrate;
+
+ break;
+ }
+ case OMX_IndexParamVideoQuantization: {
+ OMX_VIDEO_PARAM_QUANTIZATIONTYPE *quant = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_QUANTIZATIONTYPE));
+ if (r)
+ return r;
+
+ quant->nQpI = priv->quant.nQpI;
+ quant->nQpP = priv->quant.nQpP;
+ quant->nQpB = priv->quant.nQpB;
+
+ break;
+ }
+ case OMX_IndexParamVideoProfileLevelCurrent: {
+ OMX_VIDEO_PARAM_PROFILELEVELTYPE *profile_level = param;
+
+ r = checkHeader(param, sizeof(OMX_VIDEO_PARAM_PROFILELEVELTYPE));
+ if (r)
+ return r;
+
+ profile_level->eProfile = priv->profile_level.eProfile;
+ profile_level->eLevel = priv->profile_level.eLevel;
+
+ break;
+ }
+ default:
+ return omx_base_component_GetParameter(handle, idx, param);
+ }
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+ int i;
+
+ if (!config)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexConfigVideoIntraVOPRefresh: {
+ OMX_CONFIG_INTRAREFRESHVOPTYPE *type = config;
+
+ r = checkHeader(config, sizeof(OMX_CONFIG_INTRAREFRESHVOPTYPE));
+ if (r)
+ return r;
+
+ priv->force_pic_type = *type;
+
+ break;
+ }
+ case OMX_IndexConfigCommonScale: {
+ OMX_CONFIG_SCALEFACTORTYPE *scale = config;
+
+ r = checkHeader(config, sizeof(OMX_CONFIG_SCALEFACTORTYPE));
+ if (r)
+ return r;
+
+ if (scale->xWidth < 176 || scale->xHeight < 144)
+ return OMX_ErrorBadParameter;
+
+ for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) {
+ if (priv->scale_buffer[i]) {
+ priv->scale_buffer[i]->destroy(priv->scale_buffer[i]);
+ priv->scale_buffer[i] = NULL;
+ }
+ }
+
+ priv->scale = *scale;
+ if (priv->scale.xWidth != 0xffffffff && priv->scale.xHeight != 0xffffffff) {
+ struct pipe_video_buffer templat = {};
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = priv->scale.xWidth;
+ templat.height = priv->scale.xHeight;
+ templat.interlaced = false;
+ for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) {
+ priv->scale_buffer[i] = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat);
+ if (!priv->scale_buffer[i])
+ return OMX_ErrorInsufficientResources;
+ }
+ }
+
+ break;
+ }
+ default:
+ return omx_base_component_SetConfig(handle, idx, config);
+ }
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx, OMX_PTR config)
+{
+ OMX_COMPONENTTYPE *comp = handle;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_ERRORTYPE r;
+
+ if (!config)
+ return OMX_ErrorBadParameter;
+
+ switch(idx) {
+ case OMX_IndexConfigCommonScale: {
+ OMX_CONFIG_SCALEFACTORTYPE *scale = config;
+
+ r = checkHeader(config, sizeof(OMX_CONFIG_SCALEFACTORTYPE));
+ if (r)
+ return r;
+
+ scale->xWidth = priv->scale.xWidth;
+ scale->xHeight = priv->scale.xHeight;
+
+ break;
+ }
+ default:
+ return omx_base_component_GetConfig(handle, idx, config);
+ }
+
+ return OMX_ErrorNone;
+}
+
+static enum pipe_video_profile enc_TranslateOMXProfileToPipe(unsigned omx_profile)
+{
+ switch (omx_profile) {
+ case OMX_VIDEO_AVCProfileBaseline:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE;
+ case OMX_VIDEO_AVCProfileMain:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN;
+ case OMX_VIDEO_AVCProfileExtended:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED;
+ case OMX_VIDEO_AVCProfileHigh:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+ case OMX_VIDEO_AVCProfileHigh10:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10;
+ case OMX_VIDEO_AVCProfileHigh422:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422;
+ case OMX_VIDEO_AVCProfileHigh444:
+ return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444;
+ default:
+ return PIPE_VIDEO_PROFILE_UNKNOWN;
+ }
+}
+
+static unsigned enc_TranslateOMXLevelToPipe(unsigned omx_level)
+{
+ switch (omx_level) {
+ case OMX_VIDEO_AVCLevel1:
+ case OMX_VIDEO_AVCLevel1b:
+ return 10;
+ case OMX_VIDEO_AVCLevel11:
+ return 11;
+ case OMX_VIDEO_AVCLevel12:
+ return 12;
+ case OMX_VIDEO_AVCLevel13:
+ return 13;
+ case OMX_VIDEO_AVCLevel2:
+ return 20;
+ case OMX_VIDEO_AVCLevel21:
+ return 21;
+ case OMX_VIDEO_AVCLevel22:
+ return 22;
+ case OMX_VIDEO_AVCLevel3:
+ return 30;
+ case OMX_VIDEO_AVCLevel31:
+ return 31;
+ case OMX_VIDEO_AVCLevel32:
+ return 32;
+ case OMX_VIDEO_AVCLevel4:
+ return 40;
+ case OMX_VIDEO_AVCLevel41:
+ return 41;
+ default:
+ case OMX_VIDEO_AVCLevel42:
+ return 42;
+ case OMX_VIDEO_AVCLevel5:
+ return 50;
+ case OMX_VIDEO_AVCLevel51:
+ return 51;
+ }
+}
+
+static OMX_ERRORTYPE vid_enc_MessageHandler(OMX_COMPONENTTYPE* comp, internalRequestMessageType *msg)
+{
+ vid_enc_PrivateType* priv = comp->pComponentPrivate;
+
+ if (msg->messageType == OMX_CommandStateSet) {
+ if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) {
+
+ struct pipe_video_codec templat = {};
+ omx_base_video_PortType *port;
+
+ port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX];
+
+ templat.profile = enc_TranslateOMXProfileToPipe(priv->profile_level.eProfile);
+ templat.level = enc_TranslateOMXLevelToPipe(priv->profile_level.eLevel);
+ templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = priv->scale_buffer[priv->current_scale_buffer] ?
+ priv->scale.xWidth : port->sPortParam.format.video.nFrameWidth;
+ templat.height = priv->scale_buffer[priv->current_scale_buffer] ?
+ priv->scale.xHeight : port->sPortParam.format.video.nFrameHeight;
+
+ if (templat.profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE) {
+ struct pipe_screen *screen = priv->screen->pscreen;
+ templat.max_references = 1;
+ priv->stacked_frames_num =
+ screen->get_video_param(screen,
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
+ PIPE_VIDEO_ENTRYPOINT_ENCODE,
+ PIPE_VIDEO_CAP_STACKED_FRAMES);
+ } else {
+ templat.max_references = OMX_VID_ENC_P_PERIOD_DEFAULT;
+ priv->stacked_frames_num = 1;
+ }
+ priv->codec = priv->s_pipe->create_video_codec(priv->s_pipe, &templat);
+
+ } else if ((msg->messageParam == OMX_StateLoaded) && (priv->state == OMX_StateIdle)) {
+ if (priv->codec) {
+ priv->codec->destroy(priv->codec);
+ priv->codec = NULL;
+ }
+ }
+ }
+
+ return omx_base_component_MessageHandler(comp, msg);
+}
+
+static OMX_ERRORTYPE vid_enc_AllocateInBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf,
+ OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size)
+{
+ struct input_buf_private *inp;
+ OMX_ERRORTYPE r;
+
+ r = base_port_AllocateBuffer(port, buf, idx, private, size);
+ if (r)
+ return r;
+
+ inp = (*buf)->pInputPortPrivate = CALLOC_STRUCT(input_buf_private);
+ if (!inp) {
+ base_port_FreeBuffer(port, idx, *buf);
+ return OMX_ErrorInsufficientResources;
+ }
+
+ LIST_INITHEAD(&inp->tasks);
+
+ FREE((*buf)->pBuffer);
+ r = enc_AllocateBackTexture(port, &inp->resource, &inp->transfer, &(*buf)->pBuffer);
+ if (r) {
+ FREE(inp);
+ base_port_FreeBuffer(port, idx, *buf);
+ return r;
+ }
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_UseInBuffer(omx_base_PortType *port, OMX_BUFFERHEADERTYPE **buf, OMX_U32 idx,
+ OMX_PTR private, OMX_U32 size, OMX_U8 *mem)
+{
+ struct input_buf_private *inp;
+ OMX_ERRORTYPE r;
+
+ r = base_port_UseBuffer(port, buf, idx, private, size, mem);
+ if (r)
+ return r;
+
+ inp = (*buf)->pInputPortPrivate = CALLOC_STRUCT(input_buf_private);
+ if (!inp) {
+ base_port_FreeBuffer(port, idx, *buf);
+ return OMX_ErrorInsufficientResources;
+ }
+
+ LIST_INITHEAD(&inp->tasks);
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_FreeInBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct input_buf_private *inp = buf->pInputPortPrivate;
+
+ if (inp) {
+ enc_ReleaseTasks(&inp->tasks);
+ if (inp->transfer)
+ pipe_transfer_unmap(priv->s_pipe, inp->transfer);
+ pipe_resource_reference(&inp->resource, NULL);
+ FREE(inp);
+ }
+ buf->pBuffer = NULL;
+
+ return base_port_FreeBuffer(port, idx, buf);
+}
+
+static OMX_ERRORTYPE vid_enc_AllocateOutBuffer(omx_base_PortType *port, OMX_INOUT OMX_BUFFERHEADERTYPE **buf,
+ OMX_IN OMX_U32 idx, OMX_IN OMX_PTR private, OMX_IN OMX_U32 size)
+{
+ OMX_ERRORTYPE r;
+
+ r = base_port_AllocateBuffer(port, buf, idx, private, size);
+ if (r)
+ return r;
+
+ FREE((*buf)->pBuffer);
+ (*buf)->pBuffer = NULL;
+ (*buf)->pOutputPortPrivate = CALLOC(1, sizeof(struct output_buf_private));
+ if (!(*buf)->pOutputPortPrivate) {
+ base_port_FreeBuffer(port, idx, *buf);
+ return OMX_ErrorInsufficientResources;
+ }
+
+ return OMX_ErrorNone;
+}
+
+static OMX_ERRORTYPE vid_enc_FreeOutBuffer(omx_base_PortType *port, OMX_U32 idx, OMX_BUFFERHEADERTYPE *buf)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+
+ if (buf->pOutputPortPrivate) {
+ struct output_buf_private *outp = buf->pOutputPortPrivate;
+ if (outp->transfer)
+ pipe_transfer_unmap(priv->t_pipe, outp->transfer);
+ pipe_resource_reference(&outp->bitstream, NULL);
+ FREE(outp);
+ buf->pOutputPortPrivate = NULL;
+ }
+ buf->pBuffer = NULL;
+
+ return base_port_FreeBuffer(port, idx, buf);
+}
+
+static struct encode_task *enc_NeedTask(omx_base_PortType *port)
+{
+ OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video;
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+
+ struct pipe_video_buffer templat = {};
+ struct encode_task *task;
+
+ if (!LIST_IS_EMPTY(&priv->free_tasks)) {
+ task = LIST_ENTRY(struct encode_task, priv->free_tasks.next, list);
+ LIST_DEL(&task->list);
+ return task;
+ }
+
+ /* allocate a new one */
+ task = CALLOC_STRUCT(encode_task);
+ if (!task)
+ return NULL;
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = def->nFrameWidth;
+ templat.height = def->nFrameHeight;
+ templat.interlaced = false;
+
+ task->buf = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat);
+ if (!task->buf) {
+ FREE(task);
+ return NULL;
+ }
+
+ return task;
+}
+
+static void enc_MoveTasks(struct list_head *from, struct list_head *to)
+{
+ to->prev->next = from->next;
+ from->next->prev = to->prev;
+ from->prev->next = to;
+ to->prev = from->prev;
+ LIST_INITHEAD(from);
+}
+
+static void enc_ReleaseTasks(struct list_head *head)
+{
+ struct encode_task *i, *next;
+
+ if (!head || !head->next)
+ return;
+
+ LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) {
+ pipe_resource_reference(&i->bitstream, NULL);
+ i->buf->destroy(i->buf);
+ FREE(i);
+ }
+}
+
+static OMX_ERRORTYPE enc_LoadImage(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf,
+ struct pipe_video_buffer *vbuf)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video;
+ struct pipe_box box = {};
+ struct input_buf_private *inp = buf->pInputPortPrivate;
+
+ if (!inp->resource) {
+ struct pipe_sampler_view **views;
+ void *ptr;
+
+ views = vbuf->get_sampler_view_planes(vbuf);
+ if (!views)
+ return OMX_ErrorInsufficientResources;
+
+ ptr = buf->pBuffer;
+ box.width = def->nFrameWidth;
+ box.height = def->nFrameHeight;
+ box.depth = 1;
+ priv->s_pipe->texture_subdata(priv->s_pipe, views[0]->texture, 0,
+ PIPE_TRANSFER_WRITE, &box,
+ ptr, def->nStride, 0);
+ ptr = ((uint8_t*)buf->pBuffer) + (def->nStride * box.height);
+ box.width = def->nFrameWidth / 2;
+ box.height = def->nFrameHeight / 2;
+ box.depth = 1;
+ priv->s_pipe->texture_subdata(priv->s_pipe, views[1]->texture, 0,
+ PIPE_TRANSFER_WRITE, &box,
+ ptr, def->nStride, 0);
+ } else {
+ struct pipe_blit_info blit;
+ struct vl_video_buffer *dst_buf = (struct vl_video_buffer *)vbuf;
+
+ pipe_transfer_unmap(priv->s_pipe, inp->transfer);
+
+ box.width = def->nFrameWidth;
+ box.height = def->nFrameHeight;
+ box.depth = 1;
+
+ priv->s_pipe->resource_copy_region(priv->s_pipe,
+ dst_buf->resources[0],
+ 0, 0, 0, 0, inp->resource, 0, &box);
+
+ memset(&blit, 0, sizeof(blit));
+ blit.src.resource = inp->resource;
+ blit.src.format = inp->resource->format;
+
+ blit.src.box.x = 0;
+ blit.src.box.y = def->nFrameHeight;
+ blit.src.box.width = def->nFrameWidth;
+ blit.src.box.height = def->nFrameHeight / 2 ;
+ blit.src.box.depth = 1;
+
+ blit.dst.resource = dst_buf->resources[1];
+ blit.dst.format = blit.dst.resource->format;
+
+ blit.dst.box.width = def->nFrameWidth / 2;
+ blit.dst.box.height = def->nFrameHeight / 2;
+ blit.dst.box.depth = 1;
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ blit.mask = PIPE_MASK_G;
+ priv->s_pipe->blit(priv->s_pipe, &blit);
+
+ blit.src.box.x = 1;
+ blit.mask = PIPE_MASK_R;
+ priv->s_pipe->blit(priv->s_pipe, &blit);
+ priv->s_pipe->flush(priv->s_pipe, NULL, 0);
+
+ box.width = inp->resource->width0;
+ box.height = inp->resource->height0;
+ box.depth = inp->resource->depth0;
+ buf->pBuffer = priv->s_pipe->transfer_map(priv->s_pipe, inp->resource, 0,
+ PIPE_TRANSFER_WRITE, &box,
+ &inp->transfer);
+ }
+
+ return OMX_ErrorNone;
+}
+
+static void enc_ScaleInput(omx_base_PortType *port, struct pipe_video_buffer **vbuf, unsigned *size)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ OMX_VIDEO_PORTDEFINITIONTYPE *def = &port->sPortParam.format.video;
+ struct pipe_video_buffer *src_buf = *vbuf;
+ struct vl_compositor *compositor = &priv->compositor;
+ struct vl_compositor_state *s = &priv->cstate;
+ struct pipe_sampler_view **views;
+ struct pipe_surface **dst_surface;
+ unsigned i;
+
+ if (!priv->scale_buffer[priv->current_scale_buffer])
+ return;
+
+ views = src_buf->get_sampler_view_planes(src_buf);
+ dst_surface = priv->scale_buffer[priv->current_scale_buffer]->get_surfaces
+ (priv->scale_buffer[priv->current_scale_buffer]);
+ vl_compositor_clear_layers(s);
+
+ for (i = 0; i < VL_MAX_SURFACES; ++i) {
+ struct u_rect src_rect;
+ if (!views[i] || !dst_surface[i])
+ continue;
+ src_rect.x0 = 0;
+ src_rect.y0 = 0;
+ src_rect.x1 = def->nFrameWidth;
+ src_rect.y1 = def->nFrameHeight;
+ if (i > 0) {
+ src_rect.x1 /= 2;
+ src_rect.y1 /= 2;
+ }
+ vl_compositor_set_rgba_layer(s, compositor, 0, views[i], &src_rect, NULL, NULL);
+ vl_compositor_render(s, compositor, dst_surface[i], NULL, false);
+ }
+ *size = priv->scale.xWidth * priv->scale.xHeight * 2;
+ *vbuf = priv->scale_buffer[priv->current_scale_buffer++];
+ priv->current_scale_buffer %= OMX_VID_ENC_NUM_SCALING_BUFFERS;
+}
+
+static void enc_GetPictureParamPreset(struct pipe_h264_enc_picture_desc *picture)
+{
+ picture->motion_est.enc_disable_sub_mode = 0x000000fe;
+ picture->motion_est.enc_ime2_search_range_x = 0x00000001;
+ picture->motion_est.enc_ime2_search_range_y = 0x00000001;
+ picture->pic_ctrl.enc_constraint_set_flags = 0x00000040;
+}
+
+static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_picture_desc *picture)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct pipe_h264_enc_rate_control *rate_ctrl = &picture->rate_ctrl;
+
+ switch (priv->bitrate.eControlRate) {
+ case OMX_Video_ControlRateVariable:
+ rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE;
+ break;
+ case OMX_Video_ControlRateConstant:
+ rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT;
+ break;
+ case OMX_Video_ControlRateVariableSkipFrames:
+ rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP;
+ break;
+ case OMX_Video_ControlRateConstantSkipFrames:
+ rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP;
+ break;
+ default:
+ rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE;
+ break;
+ }
+
+ rate_ctrl->frame_rate_den = OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT;
+ rate_ctrl->frame_rate_num = ((priv->frame_rate) >> 16) * rate_ctrl->frame_rate_den;
+
+ if (rate_ctrl->rate_ctrl_method != PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE) {
+ if (priv->bitrate.nTargetBitrate < OMX_VID_ENC_BITRATE_MIN)
+ rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MIN;
+ else if (priv->bitrate.nTargetBitrate < OMX_VID_ENC_BITRATE_MAX)
+ rate_ctrl->target_bitrate = priv->bitrate.nTargetBitrate;
+ else
+ rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MAX;
+ rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate;
+ if (rate_ctrl->target_bitrate < OMX_VID_ENC_BITRATE_MEDIAN)
+ rate_ctrl->vbv_buffer_size = MIN2((rate_ctrl->target_bitrate * 2.75), OMX_VID_ENC_BITRATE_MEDIAN);
+ else
+ rate_ctrl->vbv_buffer_size = rate_ctrl->target_bitrate;
+
+ if (rate_ctrl->frame_rate_num) {
+ unsigned long long t = rate_ctrl->target_bitrate;
+ t *= rate_ctrl->frame_rate_den;
+ rate_ctrl->target_bits_picture = t / rate_ctrl->frame_rate_num;
+ } else {
+ rate_ctrl->target_bits_picture = rate_ctrl->target_bitrate;
+ }
+ rate_ctrl->peak_bits_picture_integer = rate_ctrl->target_bits_picture;
+ rate_ctrl->peak_bits_picture_fraction = 0;
+ }
+
+ picture->quant_i_frames = priv->quant.nQpI;
+ picture->quant_p_frames = priv->quant.nQpP;
+ picture->quant_b_frames = priv->quant.nQpB;
+
+ picture->frame_num = priv->frame_num;
+ picture->ref_idx_l0 = priv->ref_idx_l0;
+ picture->ref_idx_l1 = priv->ref_idx_l1;
+ picture->enable_vui = (picture->rate_ctrl.frame_rate_num != 0);
+ enc_GetPictureParamPreset(picture);
+}
+
+static void enc_HandleTask(omx_base_PortType *port, struct encode_task *task,
+ enum pipe_h264_enc_picture_type picture_type)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ unsigned size = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]->sPortParam.nBufferSize;
+ struct pipe_video_buffer *vbuf = task->buf;
+ struct pipe_h264_enc_picture_desc picture = {};
+
+ /* -------------- scale input image --------- */
+ enc_ScaleInput(port, &vbuf, &size);
+ priv->s_pipe->flush(priv->s_pipe, NULL, 0);
+
+ /* -------------- allocate output buffer --------- */
+ task->bitstream = pipe_buffer_create(priv->s_pipe->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STAGING, /* map for read */
+ size);
+
+ picture.picture_type = picture_type;
+ picture.pic_order_cnt = task->pic_order_cnt;
+ if (priv->restricted_b_frames && picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
+ picture.not_referenced = true;
+ enc_ControlPicture(port, &picture);
+
+ /* -------------- encode frame --------- */
+ priv->codec->begin_frame(priv->codec, vbuf, &picture.base);
+ priv->codec->encode_bitstream(priv->codec, vbuf, task->bitstream, &task->feedback);
+ priv->codec->end_frame(priv->codec, vbuf, &picture.base);
+}
+
+static void enc_ClearBframes(omx_base_PortType *port, struct input_buf_private *inp)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct encode_task *task;
+
+ if (LIST_IS_EMPTY(&priv->b_frames))
+ return;
+
+ task = LIST_ENTRY(struct encode_task, priv->b_frames.prev, list);
+ LIST_DEL(&task->list);
+
+ /* promote last from to P frame */
+ priv->ref_idx_l0 = priv->ref_idx_l1;
+ enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_P);
+ LIST_ADDTAIL(&task->list, &inp->tasks);
+ priv->ref_idx_l1 = priv->frame_num++;
+
+ /* handle B frames */
+ LIST_FOR_EACH_ENTRY(task, &priv->b_frames, list) {
+ enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_B);
+ if (!priv->restricted_b_frames)
+ priv->ref_idx_l0 = priv->frame_num;
+ priv->frame_num++;
+ }
+
+ enc_MoveTasks(&priv->b_frames, &inp->tasks);
+}
+
+static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEADERTYPE *buf)
+{
+ OMX_COMPONENTTYPE* comp = port->standCompContainer;
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct input_buf_private *inp = buf->pInputPortPrivate;
+ enum pipe_h264_enc_picture_type picture_type;
+ struct encode_task *task;
+ unsigned stacked_num = 0;
+ OMX_ERRORTYPE err;
+
+ enc_MoveTasks(&inp->tasks, &priv->free_tasks);
+ task = enc_NeedTask(port);
+ if (!task)
+ return OMX_ErrorInsufficientResources;
+
+ if (buf->nFilledLen == 0) {
+ if (buf->nFlags & OMX_BUFFERFLAG_EOS) {
+ buf->nFilledLen = buf->nAllocLen;
+ enc_ClearBframes(port, inp);
+ enc_MoveTasks(&priv->stacked_tasks, &inp->tasks);
+ priv->codec->flush(priv->codec);
+ }
+ return base_port_SendBufferFunction(port, buf);
+ }
+
+ if (buf->pOutputPortPrivate) {
+ struct pipe_video_buffer *vbuf = buf->pOutputPortPrivate;
+ buf->pOutputPortPrivate = task->buf;
+ task->buf = vbuf;
+ } else {
+ /* ------- load input image into video buffer ---- */
+ err = enc_LoadImage(port, buf, task->buf);
+ if (err != OMX_ErrorNone) {
+ FREE(task);
+ return err;
+ }
+ }
+
+ /* -------------- determine picture type --------- */
+ if (!(priv->pic_order_cnt % OMX_VID_ENC_IDR_PERIOD_DEFAULT) ||
+ priv->force_pic_type.IntraRefreshVOP) {
+ enc_ClearBframes(port, inp);
+ picture_type = PIPE_H264_ENC_PICTURE_TYPE_IDR;
+ priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
+ priv->frame_num = 0;
+ } else if (priv->codec->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE ||
+ !(priv->pic_order_cnt % OMX_VID_ENC_P_PERIOD_DEFAULT) ||
+ (buf->nFlags & OMX_BUFFERFLAG_EOS)) {
+ picture_type = PIPE_H264_ENC_PICTURE_TYPE_P;
+ } else {
+ picture_type = PIPE_H264_ENC_PICTURE_TYPE_B;
+ }
+
+ task->pic_order_cnt = priv->pic_order_cnt++;
+
+ if (picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+ /* put frame at the tail of the queue */
+ LIST_ADDTAIL(&task->list, &priv->b_frames);
+ } else {
+ /* handle I or P frame */
+ priv->ref_idx_l0 = priv->ref_idx_l1;
+ enc_HandleTask(port, task, picture_type);
+ LIST_ADDTAIL(&task->list, &priv->stacked_tasks);
+ LIST_FOR_EACH_ENTRY(task, &priv->stacked_tasks, list) {
+ ++stacked_num;
+ }
+ if (stacked_num == priv->stacked_frames_num) {
+ struct encode_task *t;
+ t = LIST_ENTRY(struct encode_task, priv->stacked_tasks.next, list);
+ LIST_DEL(&t->list);
+ LIST_ADDTAIL(&t->list, &inp->tasks);
+ }
+ priv->ref_idx_l1 = priv->frame_num++;
+
+ /* handle B frames */
+ LIST_FOR_EACH_ENTRY(task, &priv->b_frames, list) {
+ enc_HandleTask(port, task, PIPE_H264_ENC_PICTURE_TYPE_B);
+ if (!priv->restricted_b_frames)
+ priv->ref_idx_l0 = priv->frame_num;
+ priv->frame_num++;
+ }
+
+ enc_MoveTasks(&priv->b_frames, &inp->tasks);
+ }
+
+ if (LIST_IS_EMPTY(&inp->tasks))
+ return port->ReturnBufferFunction(port, buf);
+ else
+ return base_port_SendBufferFunction(port, buf);
+}
+
+static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output)
+{
+ vid_enc_PrivateType *priv = comp->pComponentPrivate;
+ struct output_buf_private *outp = output->pOutputPortPrivate;
+ struct input_buf_private *inp = input->pInputPortPrivate;
+ struct encode_task *task;
+ struct pipe_box box = {};
+ unsigned size;
+
+ if (!inp || LIST_IS_EMPTY(&inp->tasks)) {
+ input->nFilledLen = 0; /* mark buffer as empty */
+ enc_MoveTasks(&priv->used_tasks, &inp->tasks);
+ return;
+ }
+
+ task = LIST_ENTRY(struct encode_task, inp->tasks.next, list);
+ LIST_DEL(&task->list);
+ LIST_ADDTAIL(&task->list, &priv->used_tasks);
+
+ if (!task->bitstream)
+ return;
+
+ /* ------------- map result buffer ----------------- */
+
+ if (outp->transfer)
+ pipe_transfer_unmap(priv->t_pipe, outp->transfer);
+
+ pipe_resource_reference(&outp->bitstream, task->bitstream);
+ pipe_resource_reference(&task->bitstream, NULL);
+
+ box.width = outp->bitstream->width0;
+ box.height = outp->bitstream->height0;
+ box.depth = outp->bitstream->depth0;
+
+ output->pBuffer = priv->t_pipe->transfer_map(priv->t_pipe, outp->bitstream, 0,
+ PIPE_TRANSFER_READ_WRITE,
+ &box, &outp->transfer);
+
+ /* ------------- get size of result ----------------- */
+
+ priv->codec->get_feedback(priv->codec, task->feedback, &size);
+
+ output->nOffset = 0;
+ output->nFilledLen = size; /* mark buffer as full */
+
+ /* all output buffers contain exactly one frame */
+ output->nFlags = OMX_BUFFERFLAG_ENDOFFRAME;
+}
diff --git a/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h
new file mode 100644
index 000000000..a83374450
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/omx_bellagio/vid_enc.h
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#ifndef OMX_VID_ENC_H
+#define OMX_VID_ENC_H
+
+#include <OMX_Types.h>
+#include <OMX_Component.h>
+#include <OMX_Core.h>
+
+#include <bellagio/st_static_component_loader.h>
+#include <bellagio/omx_base_filter.h>
+
+#include "util/list.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_compositor.h"
+
+#define OMX_VID_ENC_BASE_NAME "OMX.mesa.video_encoder"
+#define OMX_VID_ENC_AVC_NAME "OMX.mesa.video_encoder.avc"
+#define OMX_VID_ENC_AVC_ROLE "video_encoder.avc"
+
+#define OMX_VID_ENC_BITRATE_MIN 64000
+#define OMX_VID_ENC_BITRATE_MEDIAN 2000000
+#define OMX_VID_ENC_BITRATE_MAX 240000000
+#define OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT 1001
+#define OMX_VID_ENC_QUANT_I_FRAMES_DEFAULT 0x1c
+#define OMX_VID_ENC_QUANT_P_FRAMES_DEFAULT 0x1c
+#define OMX_VID_ENC_QUANT_B_FRAMES_DEFAULT 0x1c
+#define OMX_VID_ENC_SCALING_WIDTH_DEFAULT 0xffffffff
+#define OMX_VID_ENC_SCALING_HEIGHT_DEFAULT 0xffffffff
+#define OMX_VID_ENC_IDR_PERIOD_DEFAULT 1000
+#define OMX_VID_ENC_P_PERIOD_DEFAULT 3
+
+#define OMX_VID_ENC_NUM_SCALING_BUFFERS 4
+
+DERIVEDCLASS(vid_enc_PrivateType, omx_base_filter_PrivateType)
+#define vid_enc_PrivateType_FIELDS omx_base_filter_PrivateType_FIELDS \
+ struct vl_screen *screen; \
+ struct pipe_context *s_pipe; \
+ struct pipe_context *t_pipe; \
+ struct pipe_video_codec *codec; \
+ struct list_head free_tasks; \
+ struct list_head used_tasks; \
+ struct list_head b_frames; \
+ struct list_head stacked_tasks; \
+ OMX_U32 frame_rate; \
+ OMX_U32 frame_num; \
+ OMX_U32 pic_order_cnt; \
+ OMX_U32 ref_idx_l0, ref_idx_l1; \
+ OMX_BOOL restricted_b_frames; \
+ OMX_VIDEO_PARAM_BITRATETYPE bitrate; \
+ OMX_VIDEO_PARAM_QUANTIZATIONTYPE quant; \
+ OMX_VIDEO_PARAM_PROFILELEVELTYPE profile_level; \
+ OMX_CONFIG_INTRAREFRESHVOPTYPE force_pic_type; \
+ struct vl_compositor compositor; \
+ struct vl_compositor_state cstate; \
+ struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS]; \
+ OMX_CONFIG_SCALEFACTORTYPE scale; \
+ OMX_U32 current_scale_buffer; \
+ OMX_U32 stacked_frames_num;
+ENDCLASS(vid_enc_PrivateType)
+
+OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp);
+
+#endif
diff --git a/lib/mesa/src/gallium/state_trackers/va/Makefile.sources b/lib/mesa/src/gallium/state_trackers/va/Makefile.sources
index daebf0120..2d6546b4b 100644
--- a/lib/mesa/src/gallium/state_trackers/va/Makefile.sources
+++ b/lib/mesa/src/gallium/state_trackers/va/Makefile.sources
@@ -10,6 +10,7 @@ C_SOURCES := \
picture_h264.c \
picture_hevc.c \
picture_vc1.c \
+ picture_mjpeg.c \
postproc.c \
subpicture.c \
surface.c \
diff --git a/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c b/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c
new file mode 100644
index 000000000..396b74344
--- /dev/null
+++ b/lib/mesa/src/gallium/state_trackers/va/picture_mjpeg.c
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "va_private.h"
+
+void vlVaHandlePictureParameterBufferMJPEG(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
+{
+ VAPictureParameterBufferJPEGBaseline *mjpeg = buf->data;
+ unsigned sf;
+ int i;
+
+ assert(buf->size >= sizeof(VAPictureParameterBufferJPEGBaseline) && buf->num_elements == 1);
+
+ context->desc.mjpeg.picture_parameter.picture_width = mjpeg->picture_width;
+ context->desc.mjpeg.picture_parameter.picture_height = mjpeg->picture_height;
+
+ for (i = 0; i < mjpeg->num_components; ++i) {
+ context->desc.mjpeg.picture_parameter.components[i].component_id =
+ mjpeg->components[i].component_id;
+ context->desc.mjpeg.picture_parameter.components[i].h_sampling_factor =
+ mjpeg->components[i].h_sampling_factor;
+ context->desc.mjpeg.picture_parameter.components[i].v_sampling_factor =
+ mjpeg->components[i].v_sampling_factor;
+ context->desc.mjpeg.picture_parameter.components[i].quantiser_table_selector =
+ mjpeg->components[i].quantiser_table_selector;
+
+ sf = mjpeg->components[i].h_sampling_factor << 4 | mjpeg->components[i].v_sampling_factor;
+ context->mjpeg.sampling_factor <<= 8;
+ context->mjpeg.sampling_factor |= sf;
+ }
+
+ context->desc.mjpeg.picture_parameter.num_components = mjpeg->num_components;
+}
+
+void vlVaHandleIQMatrixBufferMJPEG(vlVaContext *context, vlVaBuffer *buf)
+{
+ VAIQMatrixBufferJPEGBaseline *mjpeg = buf->data;
+
+ assert(buf->size >= sizeof(VAIQMatrixBufferJPEGBaseline) && buf->num_elements == 1);
+
+ memcpy(&context->desc.mjpeg.quantization_table.load_quantiser_table, mjpeg->load_quantiser_table, 4);
+ memcpy(&context->desc.mjpeg.quantization_table.quantiser_table, mjpeg->quantiser_table, 4 * 64);
+}
+
+void vlVaHandleHuffmanTableBufferType(vlVaContext *context, vlVaBuffer *buf)
+{
+ VAHuffmanTableBufferJPEGBaseline *mjpeg = buf->data;
+ int i;
+
+ assert(buf->size >= sizeof(VASliceParameterBufferJPEGBaseline) && buf->num_elements == 1);
+
+ for (i = 0; i < 2; ++i) {
+ context->desc.mjpeg.huffman_table.load_huffman_table[i] = mjpeg->load_huffman_table[i];
+
+ memcpy(&context->desc.mjpeg.huffman_table.table[i].num_dc_codes,
+ mjpeg->huffman_table[i].num_dc_codes, 16);
+ memcpy(&context->desc.mjpeg.huffman_table.table[i].dc_values,
+ mjpeg->huffman_table[i].dc_values, 12);
+ memcpy(&context->desc.mjpeg.huffman_table.table[i].num_ac_codes,
+ mjpeg->huffman_table[i].num_ac_codes, 16);
+ memcpy(&context->desc.mjpeg.huffman_table.table[i].ac_values,
+ mjpeg->huffman_table[i].ac_values, 162);
+ memcpy(&context->desc.mjpeg.huffman_table.table[i].pad, mjpeg->huffman_table[i].pad, 2);
+ }
+}
+
+void vlVaHandleSliceParameterBufferMJPEG(vlVaContext *context, vlVaBuffer *buf)
+{
+ VASliceParameterBufferJPEGBaseline *mjpeg = buf->data;
+ int i;
+
+ assert(buf->size >= sizeof(VASliceParameterBufferJPEGBaseline) && buf->num_elements == 1);
+
+ context->desc.mjpeg.slice_parameter.slice_data_size = mjpeg->slice_data_size;
+ context->desc.mjpeg.slice_parameter.slice_data_offset = mjpeg->slice_data_offset;
+ context->desc.mjpeg.slice_parameter.slice_data_flag = mjpeg->slice_data_flag;
+ context->desc.mjpeg.slice_parameter.slice_horizontal_position = mjpeg->slice_horizontal_position;
+ context->desc.mjpeg.slice_parameter.slice_vertical_position = mjpeg->slice_vertical_position;
+
+ for (i = 0; i < mjpeg->num_components; ++i) {
+ context->desc.mjpeg.slice_parameter.components[i].component_selector =
+ mjpeg->components[i].component_selector;
+ context->desc.mjpeg.slice_parameter.components[i].dc_table_selector =
+ mjpeg->components[i].dc_table_selector;
+ context->desc.mjpeg.slice_parameter.components[i].ac_table_selector =
+ mjpeg->components[i].ac_table_selector;
+ }
+
+ context->desc.mjpeg.slice_parameter.num_components = mjpeg->num_components;
+ context->desc.mjpeg.slice_parameter.restart_interval = mjpeg->restart_interval;
+ context->desc.mjpeg.slice_parameter.num_mcus = mjpeg->num_mcus;
+}
diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am
new file mode 100644
index 000000000..4c9a12c3c
--- /dev/null
+++ b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.am
@@ -0,0 +1,75 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ $(GALLIUM_TARGET_CFLAGS)
+
+omxdir = $(OMX_BELLAGIO_LIB_INSTALL_DIR)
+omx_LTLIBRARIES = libomx_mesa.la
+
+nodist_EXTRA_libomx_mesa_la_SOURCES = dummy.cpp
+libomx_mesa_la_SOURCES =
+
+libomx_mesa_la_LDFLAGS = \
+ -shared \
+ -module \
+ -no-undefined \
+ -avoid-version \
+ $(GC_SECTIONS) \
+ $(LD_NO_UNDEFINED)
+
+if HAVE_LD_VERSION_SCRIPT
+libomx_mesa_la_LDFLAGS += \
+ -Wl,--version-script=$(top_srcdir)/src/gallium/targets/omx-bellagio/omx.sym
+endif # HAVE_LD_VERSION_SCRIPT
+
+libomx_mesa_la_LIBADD = \
+ $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(OMX_BELLAGIO_LIBS) \
+ $(LIBDRM_LIBS) \
+ $(GALLIUM_COMMON_LIB_DEPS)
+
+if HAVE_PLATFORM_X11
+libomx_mesa_la_LIBADD += \
+ $(VL_LIBS) \
+ $(XCB_DRI3_LIBS)
+endif
+
+EXTRA_libomx_mesa_la_DEPENDENCIES = omx.sym
+EXTRA_DIST = omx.sym
+
+if HAVE_GALLIUM_STATIC_TARGETS
+
+TARGET_DRIVERS =
+TARGET_CPPFLAGS =
+TARGET_LIB_DEPS =
+
+
+include $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc
+
+include $(top_srcdir)/src/gallium/drivers/r600/Automake.inc
+include $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
+
+libomx_mesa_la_SOURCES += target.c
+libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS)
+libomx_mesa_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+ $(TARGET_LIB_DEPS) \
+ $(TARGET_COMPILER_LIB_DEPS) \
+ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
+
+else # HAVE_GALLIUM_STATIC_TARGETS
+
+libomx_mesa_la_LIBADD += \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
+
+endif # HAVE_GALLIUM_STATIC_TARGETS
+
+if HAVE_GALLIUM_LLVM
+libomx_mesa_la_LIBADD += $(LLVM_LIBS)
+libomx_mesa_la_LDFLAGS += $(LLVM_LDFLAGS)
+endif
diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in
new file mode 100644
index 000000000..1c63b48c7
--- /dev/null
+++ b/lib/mesa/src/gallium/targets/omx-bellagio/Makefile.in
@@ -0,0 +1,1102 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_LIBDRM_TRUE@am__append_1 = \
+@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+
+@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_LD_VERSION_SCRIPT_TRUE@am__append_4 = \
+@HAVE_LD_VERSION_SCRIPT_TRUE@ -Wl,--version-script=$(top_srcdir)/src/gallium/targets/omx-bellagio/omx.sym
+
+@HAVE_PLATFORM_X11_TRUE@am__append_5 = \
+@HAVE_PLATFORM_X11_TRUE@ $(VL_LIBS) \
+@HAVE_PLATFORM_X11_TRUE@ $(XCB_DRI3_LIBS)
+
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_6 = nouveau
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_7 = -DGALLIUM_NOUVEAU
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_8 = \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/nouveau/drm/libnouveaudrm.la \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/nouveau/libnouveau.la \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(NOUVEAU_LIBS) \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_9 = r600
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_10 = -DGALLIUM_R600
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_11 = \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/r600/libr600.la \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(RADEON_LIBS) \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS) \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBELF_LIBS)
+
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_12 = radeonsi
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_13 = -DGALLIUM_RADEONSI
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_14 = \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(RADEON_LIBS) \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(LIBDRM_LIBS) \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(AMDGPU_LIBS)
+
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_15 = target.c
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__append_16 = \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_LIB_DEPS) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_COMPILER_LIB_DEPS) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
+
+@HAVE_GALLIUM_STATIC_TARGETS_FALSE@am__append_17 = \
+@HAVE_GALLIUM_STATIC_TARGETS_FALSE@ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_dynamic.la
+
+@HAVE_GALLIUM_LLVM_TRUE@am__append_18 = $(LLVM_LIBS)
+@HAVE_GALLIUM_LLVM_TRUE@am__append_19 = $(LLVM_LDFLAGS)
+subdir = src/gallium/targets/omx-bellagio
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+ $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+ $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+ $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_flex.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(omxdir)"
+LTLIBRARIES = $(omx_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+@HAVE_LIBDRM_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1)
+am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2)
+@HAVE_PLATFORM_X11_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) \
+@HAVE_PLATFORM_X11_TRUE@ $(am__DEPENDENCIES_1)
+@HAVE_DRISW_KMS_TRUE@am__DEPENDENCIES_5 = $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+@HAVE_DRISW_KMS_TRUE@ $(am__DEPENDENCIES_1)
+am__DEPENDENCIES_6 = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_2) $(am__DEPENDENCIES_5)
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_7 = $(top_builddir)/src/gallium/winsys/nouveau/drm/libnouveaudrm.la \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/nouveau/libnouveau.la \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \
+@HAVE_GALLIUM_NOUVEAU_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1)
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_8 = $(top_builddir)/src/gallium/drivers/r600/libr600.la \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1)
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_9 = $(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1) \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_1)
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_10 = \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_7) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_8) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_9)
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__DEPENDENCIES_11 = $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_static.la \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_6) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__DEPENDENCIES_10) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_COMPILER_LIB_DEPS) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_WINSYS) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(TARGET_RADEON_COMMON)
+@HAVE_GALLIUM_LLVM_TRUE@am__DEPENDENCIES_12 = $(am__DEPENDENCIES_1)
+libomx_mesa_la_DEPENDENCIES = $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_3) \
+ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_11) $(am__append_17) \
+ $(am__DEPENDENCIES_12)
+am__libomx_mesa_la_SOURCES_DIST = target.c
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@am__objects_1 = \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ libomx_mesa_la-target.lo
+am_libomx_mesa_la_OBJECTS = $(am__objects_1)
+libomx_mesa_la_OBJECTS = $(am_libomx_mesa_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libomx_mesa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) $(libomx_mesa_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo " CXX " $@;
+am__v_CXX_1 =
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo " CXXLD " $@;
+am__v_CXXLD_1 =
+SOURCES = $(libomx_mesa_la_SOURCES) \
+ $(nodist_EXTRA_libomx_mesa_la_SOURCES)
+DIST_SOURCES = $(am__libomx_mesa_la_SOURCES_DIST)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/bin/depcomp \
+ $(top_srcdir)/src/gallium/Automake.inc \
+ $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc \
+ $(top_srcdir)/src/gallium/drivers/r600/Automake.inc \
+ $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
+AMDGPU_LIBS = @AMDGPU_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+ANDROID_CFLAGS = @ANDROID_CFLAGS@
+ANDROID_LIBS = @ANDROID_LIBS@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
+ETNAVIV_LIBS = @ETNAVIV_LIBS@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLVND_CFLAGS = @GLVND_CFLAGS@
+GLVND_LIBS = @GLVND_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+I915_CFLAGS = @I915_CFLAGS@
+I915_LIBS = @I915_LIBS@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBATOMIC_LIBS = @LIBATOMIC_LIBS@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBELF_CFLAGS = @LIBELF_CFLAGS@
+LIBELF_LIBS = @LIBELF_LIBS@
+LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBS = @LLVM_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
+NVVIEUX_LIBS = @NVVIEUX_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENCL_VERSION = @OPENCL_VERSION@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
+PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PWR8_CFLAGS = @PWR8_CFLAGS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+RM = @RM@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
+SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
+SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
+SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
+SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+VALGRIND_LIBS = @VALGRIND_LIBS@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
+WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
+XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+GALLIUM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES)
+
+
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_DRIVER_CXXFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CXXFLAGS)
+
+GALLIUM_TARGET_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
+ $(DEFINES) \
+ $(PTHREAD_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
+ $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_WINSYS_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_2) $(am__append_3)
+AM_CFLAGS = \
+ $(GALLIUM_TARGET_CFLAGS)
+
+omxdir = $(OMX_BELLAGIO_LIB_INSTALL_DIR)
+omx_LTLIBRARIES = libomx_mesa.la
+nodist_EXTRA_libomx_mesa_la_SOURCES = dummy.cpp
+libomx_mesa_la_SOURCES = $(am__append_15)
+libomx_mesa_la_LDFLAGS = -shared -module -no-undefined -avoid-version \
+ $(GC_SECTIONS) $(LD_NO_UNDEFINED) $(am__append_4) \
+ $(am__append_19)
+libomx_mesa_la_LIBADD = $(top_builddir)/src/gallium/state_trackers/omx_bellagio/libomxtracker.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvlwinsys.la \
+ $(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la $(OMX_BELLAGIO_LIBS) \
+ $(LIBDRM_LIBS) $(GALLIUM_COMMON_LIB_DEPS) $(am__append_5) \
+ $(am__append_16) $(am__append_17) $(am__append_18)
+EXTRA_libomx_mesa_la_DEPENDENCIES = omx.sym
+EXTRA_DIST = omx.sym
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_DRIVERS = $(am__append_6) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_9) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_12)
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_CPPFLAGS = $(am__append_7) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_10) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_13)
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_LIB_DEPS = $(am__append_8) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_11) \
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(am__append_14)
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_WINSYS = \
+@HAVE_GALLIUM_R600_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
+
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_WINSYS = \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la
+
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_COMPILER_LIB_DEPS = \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/compiler/nir/libnir.la
+
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@TARGET_RADEON_COMMON = \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
+@HAVE_GALLIUM_RADEONSI_TRUE@@HAVE_GALLIUM_STATIC_TARGETS_TRUE@ $(top_builddir)/src/amd/common/libamd_common.la
+
+@HAVE_GALLIUM_STATIC_TARGETS_TRUE@libomx_mesa_la_CPPFLAGS = $(TARGET_CPPFLAGS)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/src/gallium/Automake.inc $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc $(top_srcdir)/src/gallium/drivers/r600/Automake.inc $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/targets/omx-bellagio/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/gallium/targets/omx-bellagio/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(top_srcdir)/src/gallium/Automake.inc $(top_srcdir)/src/gallium/drivers/nouveau/Automake.inc $(top_srcdir)/src/gallium/drivers/r600/Automake.inc $(top_srcdir)/src/gallium/drivers/radeonsi/Automake.inc $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+install-omxLTLIBRARIES: $(omx_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(omx_LTLIBRARIES)'; test -n "$(omxdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(omxdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(omxdir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(omxdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(omxdir)"; \
+ }
+
+uninstall-omxLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(omx_LTLIBRARIES)'; test -n "$(omxdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(omxdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(omxdir)/$$f"; \
+ done
+
+clean-omxLTLIBRARIES:
+ -test -z "$(omx_LTLIBRARIES)" || rm -f $(omx_LTLIBRARIES)
+ @list='$(omx_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libomx_mesa.la: $(libomx_mesa_la_OBJECTS) $(libomx_mesa_la_DEPENDENCIES) $(EXTRA_libomx_mesa_la_DEPENDENCIES)
+ $(AM_V_CXXLD)$(libomx_mesa_la_LINK) -rpath $(omxdir) $(libomx_mesa_la_OBJECTS) $(libomx_mesa_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libomx_mesa_la-dummy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libomx_mesa_la-target.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+libomx_mesa_la-target.lo: target.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libomx_mesa_la-target.lo -MD -MP -MF $(DEPDIR)/libomx_mesa_la-target.Tpo -c -o libomx_mesa_la-target.lo `test -f 'target.c' || echo '$(srcdir)/'`target.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libomx_mesa_la-target.Tpo $(DEPDIR)/libomx_mesa_la-target.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='target.c' object='libomx_mesa_la-target.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libomx_mesa_la-target.lo `test -f 'target.c' || echo '$(srcdir)/'`target.c
+
+.cpp.o:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cpp.obj:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cpp.lo:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+libomx_mesa_la-dummy.lo: dummy.cpp
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libomx_mesa_la-dummy.lo -MD -MP -MF $(DEPDIR)/libomx_mesa_la-dummy.Tpo -c -o libomx_mesa_la-dummy.lo `test -f 'dummy.cpp' || echo '$(srcdir)/'`dummy.cpp
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libomx_mesa_la-dummy.Tpo $(DEPDIR)/libomx_mesa_la-dummy.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='dummy.cpp' object='libomx_mesa_la-dummy.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libomx_mesa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libomx_mesa_la-dummy.lo `test -f 'dummy.cpp' || echo '$(srcdir)/'`dummy.cpp
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+ for dir in "$(DESTDIR)$(omxdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-omxLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-omxLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-omxLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-omxLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-omxLTLIBRARIES install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am uninstall-omxLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym b/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym
new file mode 100644
index 000000000..e8a287600
--- /dev/null
+++ b/lib/mesa/src/gallium/targets/omx-bellagio/omx.sym
@@ -0,0 +1,11 @@
+{
+ global:
+ omx_component_library_Setup;
+
+ # Workaround for an LLVM warning with -simplifycfg-sink-common
+ # due to LLVM being initialized multiple times.
+ radeon_drm_winsys_create;
+ amdgpu_winsys_create;
+ local:
+ *;
+};
diff --git a/lib/mesa/src/gallium/targets/omx-bellagio/target.c b/lib/mesa/src/gallium/targets/omx-bellagio/target.c
new file mode 100644
index 000000000..308e23bb4
--- /dev/null
+++ b/lib/mesa/src/gallium/targets/omx-bellagio/target.c
@@ -0,0 +1,2 @@
+#include "target-helpers/drm_helper.h"
+#include "target-helpers/sw_helper.h"
diff --git a/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh b/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh
index 85fb9ea4e..097774336 100644
--- a/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh
+++ b/lib/mesa/src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh
@@ -2,12 +2,12 @@ FRAG
DCL IN[0], COLOR, LINEAR
DCL OUT[0], COLOR
-DCL CONST[1]
-DCL CONST[3]
+DCL CONST[0][1]
+DCL CONST[0][3]
DCL TEMP[0..1]
-ADD TEMP[0], IN[0], CONST[1]
-RCP TEMP[1], CONST[3].xxxx
+ADD TEMP[0], IN[0], CONST[0][1]
+RCP TEMP[1], CONST[0][3].xxxx
MUL OUT[0], TEMP[0], TEMP[1]
END
diff --git a/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh b/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh
index e227917fd..0b05ca8b6 100644
--- a/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh
+++ b/lib/mesa/src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh
@@ -4,13 +4,13 @@ DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
-DCL CONST[1]
-DCL CONST[3]
+DCL CONST[0][1]
+DCL CONST[0][3]
DCL TEMP[0..1]
MOV OUT[0], IN[0]
-ADD TEMP[0], IN[1], CONST[1]
-RCP TEMP[1], CONST[3].xxxx
+ADD TEMP[0], IN[1], CONST[0][1]
+RCP TEMP[1], CONST[0][3].xxxx
MUL OUT[1], TEMP[0], TEMP[1]
END
diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am
new file mode 100644
index 000000000..fc5d1ca57
--- /dev/null
+++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.am
@@ -0,0 +1,31 @@
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(GALLIUM_WINSYS_CFLAGS)
+
+noinst_LTLIBRARIES = libvc5drm.la
+
+libvc5drm_la_SOURCES = $(C_SOURCES)
diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in
new file mode 100644
index 000000000..9884b0eea
--- /dev/null
+++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.in
@@ -0,0 +1,882 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright © 2014 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_LIBDRM_TRUE@am__append_1 = \
+@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
+
+@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+
+@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
+
+subdir = src/gallium/winsys/vc5/drm
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+ $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+ $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+ $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_flex.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libvc5drm_la_LIBADD =
+am__objects_1 = vc5_drm_winsys.lo
+am_libvc5drm_la_OBJECTS = $(am__objects_1)
+libvc5drm_la_OBJECTS = $(am_libvc5drm_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libvc5drm_la_SOURCES)
+DIST_SOURCES = $(libvc5drm_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+ $(top_srcdir)/bin/depcomp \
+ $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
+AMDGPU_LIBS = @AMDGPU_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+ANDROID_CFLAGS = @ANDROID_CFLAGS@
+ANDROID_LIBS = @ANDROID_LIBS@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CLOVER_STD_OVERRIDE = @CLOVER_STD_OVERRIDE@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
+ETNAVIV_LIBS = @ETNAVIV_LIBS@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLVND_CFLAGS = @GLVND_CFLAGS@
+GLVND_LIBS = @GLVND_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+I915_CFLAGS = @I915_CFLAGS@
+I915_LIBS = @I915_LIBS@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBATOMIC_LIBS = @LIBATOMIC_LIBS@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBELF_CFLAGS = @LIBELF_CFLAGS@
+LIBELF_LIBS = @LIBELF_LIBS@
+LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBS = @LLVM_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
+NVVIEUX_LIBS = @NVVIEUX_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENCL_VERSION = @OPENCL_VERSION@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
+PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PWR8_CFLAGS = @PWR8_CFLAGS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+RM = @RM@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
+SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
+SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
+SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+SWR_KNL_CXXFLAGS = @SWR_KNL_CXXFLAGS@
+SWR_SKX_CXXFLAGS = @SWR_SKX_CXXFLAGS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+VALGRIND_LIBS = @VALGRIND_LIBS@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
+WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XCB_DRI3_CFLAGS = @XCB_DRI3_CFLAGS@
+XCB_DRI3_LIBS = @XCB_DRI3_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+C_SOURCES := \
+ vc5_drm_public.h \
+ vc5_drm_winsys.c
+
+GALLIUM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES)
+
+
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_DRIVER_CXXFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CXXFLAGS)
+
+GALLIUM_TARGET_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
+ $(DEFINES) \
+ $(PTHREAD_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
+ $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_WINSYS_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_2) $(am__append_3)
+AM_CFLAGS = \
+ -I$(top_srcdir)/src/gallium/drivers \
+ $(GALLIUM_WINSYS_CFLAGS)
+
+noinst_LTLIBRARIES = libvc5drm.la
+libvc5drm_la_SOURCES = $(C_SOURCES)
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/winsys/vc5/drm/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/gallium/winsys/vc5/drm/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libvc5drm.la: $(libvc5drm_la_OBJECTS) $(libvc5drm_la_DEPENDENCIES) $(EXTRA_libvc5drm_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(libvc5drm_la_OBJECTS) $(libvc5drm_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vc5_drm_winsys.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources
new file mode 100644
index 000000000..ea7566f8d
--- /dev/null
+++ b/lib/mesa/src/gallium/winsys/vc5/drm/Makefile.sources
@@ -0,0 +1,3 @@
+C_SOURCES := \
+ vc5_drm_public.h \
+ vc5_drm_winsys.c
diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h
new file mode 100644
index 000000000..6e1984815
--- /dev/null
+++ b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_public.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __VC5_DRM_PUBLIC_H__
+#define __VC5_DRM_PUBLIC_H__
+
+struct pipe_screen;
+
+struct pipe_screen *vc5_drm_screen_create(int drmFD);
+
+#endif /* __VC5_DRM_PUBLIC_H__ */
diff --git a/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c
new file mode 100644
index 000000000..d089291bf
--- /dev/null
+++ b/lib/mesa/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "vc5_drm_public.h"
+
+#include "vc5/vc5_screen.h"
+
+struct pipe_screen *
+vc5_drm_screen_create(int fd)
+{
+ return vc5_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3));
+}
diff --git a/lib/mesa/src/git_sha1.h.in b/lib/mesa/src/git_sha1.h.in
new file mode 100644
index 000000000..dc17f5e66
--- /dev/null
+++ b/lib/mesa/src/git_sha1.h.in
@@ -0,0 +1 @@
+#define MESA_GIT_SHA1 "git-@VCS_TAG@"
diff --git a/lib/mesa/src/intel/common/gen_sample_positions.h b/lib/mesa/src/intel/common/gen_sample_positions.h
index b86a7d843..f0ce95dd1 100644
--- a/lib/mesa/src/intel/common/gen_sample_positions.h
+++ b/lib/mesa/src/intel/common/gen_sample_positions.h
@@ -23,16 +23,38 @@
#ifndef GEN_SAMPLE_POSITIONS_H
#define GEN_SAMPLE_POSITIONS_H
+/*
+ * This file defines the standard multisample positions used by both GL and
+ * Vulkan. These correspond to the Vulkan "standard sample locations".
+ */
+
+/**
+ * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
+ */
#define GEN_SAMPLE_POS_1X(prefix) \
prefix##0XOffset = 0.5; \
prefix##0YOffset = 0.5;
+/**
+ * 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75):
+ * 4 c
+ * 4 0
+ * c 1
+ */
#define GEN_SAMPLE_POS_2X(prefix) \
prefix##0XOffset = 0.25; \
prefix##0YOffset = 0.25; \
prefix##1XOffset = 0.75; \
prefix##1YOffset = 0.75;
+/**
+ * Sample positions:
+ * 2 6 a e
+ * 2 0
+ * 6 1
+ * a 2
+ * e 3
+ */
#define GEN_SAMPLE_POS_4X(prefix) \
prefix##0XOffset = 0.375; \
prefix##0YOffset = 0.125; \
@@ -43,6 +65,28 @@ prefix##2YOffset = 0.625; \
prefix##3XOffset = 0.625; \
prefix##3YOffset = 0.875;
+/**
+ * Sample positions:
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
+ * Programming Notes):
+ * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
+ * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
+ * for 8X) must have monotonically increasing distance from the
+ * pixel center. This is required to get the correct centroid
+ * computation in the device."
+ *
+ * Sample positions:
+ * 1 3 5 7 9 b d f
+ * 1 7
+ * 3 3
+ * 5 0
+ * 7 5
+ * 9 2
+ * b 1
+ * d 4
+ * f 6
+ */
#define GEN_SAMPLE_POS_8X(prefix) \
prefix##0XOffset = 0.5625; \
prefix##0YOffset = 0.3125; \
@@ -61,6 +105,27 @@ prefix##6YOffset = 0.9375; \
prefix##7XOffset = 0.9375; \
prefix##7YOffset = 0.0625;
+/**
+ * Sample positions:
+ *
+ * 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * 0 15
+ * 1 9
+ * 2 10
+ * 3 7
+ * 4 13
+ * 5 1
+ * 6 4
+ * 7 3
+ * 8 12
+ * 9 0
+ * a 2
+ * b 6
+ * c 11
+ * d 5
+ * e 8
+ * f 14
+ */
#define GEN_SAMPLE_POS_16X(prefix) \
prefix##0XOffset = 0.5625; \
prefix##0YOffset = 0.5625; \
diff --git a/lib/mesa/src/intel/common/intel_log.c b/lib/mesa/src/intel/common/intel_log.c
new file mode 100644
index 000000000..cebdd6dd6
--- /dev/null
+++ b/lib/mesa/src/intel/common/intel_log.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2017 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+
+#ifdef ANDROID
+#include <android/log.h>
+#else
+#include <stdio.h>
+#endif
+
+#include "intel_log.h"
+
+#ifdef ANDROID
+static inline android_LogPriority
+level_to_android(enum intel_log_level l)
+{
+ switch (l) {
+ case INTEL_LOG_ERROR: return ANDROID_LOG_ERROR;
+ case INTEL_LOG_WARN: return ANDROID_LOG_WARN;
+ case INTEL_LOG_INFO: return ANDROID_LOG_INFO;
+ case INTEL_LOG_DEBUG: return ANDROID_LOG_DEBUG;
+ }
+
+ unreachable("bad intel_log_level");
+}
+#endif
+
+#ifndef ANDROID
+static inline const char *
+level_to_str(enum intel_log_level l)
+{
+ switch (l) {
+ case INTEL_LOG_ERROR: return "error";
+ case INTEL_LOG_WARN: return "warning";
+ case INTEL_LOG_INFO: return "info";
+ case INTEL_LOG_DEBUG: return "debug";
+ }
+
+ unreachable("bad intel_log_level");
+}
+#endif
+
+void
+intel_log(enum intel_log_level level, const char *tag, const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ intel_log_v(level, tag, format, va);
+ va_end(va);
+}
+
+void
+intel_log_v(enum intel_log_level level, const char *tag, const char *format,
+ va_list va)
+{
+#ifdef ANDROID
+ __android_log_vprint(level_to_android(level), tag, format, va);
+#else
+ flockfile(stderr);
+ fprintf(stderr, "%s: %s: ", tag, level_to_str(level));
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
+ funlockfile(stderr);
+#endif
+}
diff --git a/lib/mesa/src/intel/common/intel_log.h b/lib/mesa/src/intel/common/intel_log.h
new file mode 100644
index 000000000..0f28109a0
--- /dev/null
+++ b/lib/mesa/src/intel/common/intel_log.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2017 Google
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef INTEL_LOG_H
+#define INTEL_LOG_H
+
+#include <stdarg.h>
+
+#include "util/macros.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef INTEL_LOG_TAG
+#define INTEL_LOG_TAG "INTEL-MESA"
+#endif
+
+enum intel_log_level {
+ INTEL_LOG_ERROR,
+ INTEL_LOG_WARN,
+ INTEL_LOG_INFO,
+ INTEL_LOG_DEBUG,
+};
+
+void PRINTFLIKE(3, 4)
+intel_log(enum intel_log_level, const char *tag, const char *format, ...);
+
+void
+intel_log_v(enum intel_log_level, const char *tag, const char *format,
+ va_list va);
+
+#define intel_loge(fmt, ...) intel_log(INTEL_LOG_ERROR, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__)
+#define intel_logw(fmt, ...) intel_log(INTEL_LOG_WARN, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__)
+#define intel_logi(fmt, ...) intel_log(INTEL_LOG_INFO, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__)
+#ifdef DEBUG
+#define intel_logd(fmt, ...) intel_log(INTEL_LOG_DEBUG, (INTEL_LOG_TAG), (fmt), ##__VA_ARGS__)
+#else
+#define intel_logd(fmt, ...) __intel_log_use_args((fmt), ##__VA_ARGS__)
+#endif
+
+#define intel_loge_v(fmt, va) intel_log_v(INTEL_LOG_ERROR, (INTEL_LOG_TAG), (fmt), (va))
+#define intel_logw_v(fmt, va) intel_log_v(INTEL_LOG_WARN, (INTEL_LOG_TAG), (fmt), (va))
+#define intel_logi_v(fmt, va) intel_log_v(INTEL_LOG_INFO, (INTEL_LOG_TAG), (fmt), (va))
+#ifdef DEBUG
+#define intel_logd_v(fmt, va) intel_log_v(INTEL_LOG_DEBUG, (INTEL_LOG_TAG), (fmt), (va))
+#else
+#define intel_logd_v(fmt, va) __intel_log_use_args((fmt), (va))
+#endif
+
+
+#ifndef DEBUG
+/* Suppres -Wunused */
+static inline void PRINTFLIKE(1, 2)
+__intel_log_use_args(const char *format, ...) { }
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* INTEL_LOG_H */
diff --git a/lib/mesa/src/intel/isl/isl_genX_priv.h b/lib/mesa/src/intel/isl/isl_genX_priv.h
new file mode 100644
index 000000000..a005e1c7b
--- /dev/null
+++ b/lib/mesa/src/intel/isl/isl_genX_priv.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: The header can be included multiple times, from the same file.
+ */
+
+/*
+ * Gen-specific function declarations. This header must *not* be included
+ * directly. Instead, it is included multiple times by isl_priv.h
+ *
+ * In this header file, the usual isl_genX() macro is available.
+ */
+
+void
+isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
+ const struct isl_surf_fill_state_info *restrict info);
+
+void
+isl_genX(buffer_fill_state_s)(void *state,
+ const struct isl_buffer_fill_state_info *restrict info);
+
+void
+isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
+ const struct isl_depth_stencil_hiz_emit_info *restrict info);
+
+void
+isl_genX(null_fill_state)(void *state, struct isl_extent3d size);
diff --git a/lib/mesa/src/intel/vulkan/anv_android.c b/lib/mesa/src/intel/vulkan/anv_android.c
new file mode 100644
index 000000000..b1bbbb682
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_android.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright © 2017, Google Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <hardware/gralloc.h>
+#include <hardware/hardware.h>
+#include <hardware/hwvulkan.h>
+#include <vulkan/vk_android_native_buffer.h>
+#include <vulkan/vk_icd.h>
+#include <sync/sync.h>
+
+#include "anv_private.h"
+
+static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
+static int anv_hal_close(struct hw_device_t *dev);
+
+static void UNUSED
+static_asserts(void)
+{
+ STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
+}
+
+PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
+ .common = {
+ .tag = HARDWARE_MODULE_TAG,
+ .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
+ .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
+ .id = HWVULKAN_HARDWARE_MODULE_ID,
+ .name = "Intel Vulkan HAL",
+ .author = "Intel",
+ .methods = &(hw_module_methods_t) {
+ .open = anv_hal_open,
+ },
+ },
+};
+
+/* If any bits in test_mask are set, then unset them and return true. */
+static inline bool
+unmask32(uint32_t *inout_mask, uint32_t test_mask)
+{
+ uint32_t orig_mask = *inout_mask;
+ *inout_mask &= ~test_mask;
+ return *inout_mask != orig_mask;
+}
+
+static int
+anv_hal_open(const struct hw_module_t* mod, const char* id,
+ struct hw_device_t** dev)
+{
+ assert(mod == &HAL_MODULE_INFO_SYM.common);
+ assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
+
+ hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
+ if (!hal_dev)
+ return -1;
+
+ *hal_dev = (hwvulkan_device_t) {
+ .common = {
+ .tag = HARDWARE_DEVICE_TAG,
+ .version = HWVULKAN_DEVICE_API_VERSION_0_1,
+ .module = &HAL_MODULE_INFO_SYM.common,
+ .close = anv_hal_close,
+ },
+ .EnumerateInstanceExtensionProperties = anv_EnumerateInstanceExtensionProperties,
+ .CreateInstance = anv_CreateInstance,
+ .GetInstanceProcAddr = anv_GetInstanceProcAddr,
+ };
+
+ *dev = &hal_dev->common;
+ return 0;
+}
+
+static int
+anv_hal_close(struct hw_device_t *dev)
+{
+ /* hwvulkan.h claims that hw_device_t::close() is never called. */
+ return -1;
+}
+
+VkResult
+anv_image_from_gralloc(VkDevice device_h,
+ const VkImageCreateInfo *base_info,
+ const VkNativeBufferANDROID *gralloc_info,
+ const VkAllocationCallbacks *alloc,
+ VkImage *out_image_h)
+
+{
+ ANV_FROM_HANDLE(anv_device, device, device_h);
+ VkImage image_h = VK_NULL_HANDLE;
+ struct anv_image *image = NULL;
+ struct anv_bo *bo = NULL;
+ VkResult result;
+
+ struct anv_image_create_info anv_info = {
+ .vk_info = base_info,
+ .isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT,
+ };
+
+ if (gralloc_info->handle->numFds != 1) {
+ return vk_errorf(device->instance, device,
+ VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+ "VkNativeBufferANDROID::handle::numFds is %d, "
+ "expected 1", gralloc_info->handle->numFds);
+ }
+
+ /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+ * must exceed that of the gralloc handle, and we do not own the gralloc
+ * handle.
+ */
+ int dma_buf = gralloc_info->handle->data[0];
+
+ result = anv_bo_cache_import(device, &device->bo_cache, dma_buf, &bo);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, device, result,
+ "failed to import dma-buf from VkNativeBufferANDROID");
+ }
+
+ int i915_tiling = anv_gem_get_tiling(device, bo->gem_handle);
+ switch (i915_tiling) {
+ case I915_TILING_NONE:
+ anv_info.isl_tiling_flags = ISL_TILING_LINEAR_BIT;
+ break;
+ case I915_TILING_X:
+ anv_info.isl_tiling_flags = ISL_TILING_X_BIT;
+ break;
+ case I915_TILING_Y:
+ anv_info.isl_tiling_flags = ISL_TILING_Y0_BIT;
+ break;
+ case -1:
+ result = vk_errorf(device->instance, device,
+ VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+ "DRM_IOCTL_I915_GEM_GET_TILING failed for "
+ "VkNativeBufferANDROID");
+ goto fail_tiling;
+ default:
+ result = vk_errorf(device->instance, device,
+ VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+ "DRM_IOCTL_I915_GEM_GET_TILING returned unknown "
+ "tiling %d for VkNativeBufferANDROID", i915_tiling);
+ goto fail_tiling;
+ }
+
+ enum isl_format format = anv_get_isl_format(&device->info,
+ base_info->format,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ base_info->tiling);
+ assert(format != ISL_FORMAT_UNSUPPORTED);
+
+ anv_info.stride = gralloc_info->stride *
+ (isl_format_get_layout(format)->bpb / 8);
+
+ result = anv_image_create(device_h, &anv_info, alloc, &image_h);
+ image = anv_image_from_handle(image_h);
+ if (result != VK_SUCCESS)
+ goto fail_create;
+
+ if (bo->size < image->size) {
+ result = vk_errorf(device, device->instance,
+ VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+ "dma-buf from VkNativeBufferANDROID is too small for "
+ "VkImage: %"PRIu64"B < %"PRIu64"B",
+ bo->size, image->size);
+ goto fail_size;
+ }
+
+ assert(image->n_planes == 1);
+ assert(image->planes[0].bo_offset == 0);
+
+ image->planes[0].bo = bo;
+ image->planes[0].bo_is_owned = true;
+
+ /* We need to set the WRITE flag on window system buffers so that GEM will
+ * know we're writing to them and synchronize uses on other rings (for
+ * example, if the display server uses the blitter ring).
+ *
+ * If this function fails and if the imported bo was resident in the cache,
+ * we should avoid updating the bo's flags. Therefore, we defer updating
+ * the flags until success is certain.
+ *
+ */
+ bo->flags &= ~EXEC_OBJECT_ASYNC;
+ bo->flags |= EXEC_OBJECT_WRITE;
+
+ /* Don't clobber the out-parameter until success is certain. */
+ *out_image_h = image_h;
+
+ return VK_SUCCESS;
+
+ fail_size:
+ anv_DestroyImage(device_h, image_h, alloc);
+ fail_create:
+ fail_tiling:
+ anv_bo_cache_release(device, &device->bo_cache, bo);
+
+ return result;
+}
+
+VkResult anv_GetSwapchainGrallocUsageANDROID(
+ VkDevice device_h,
+ VkFormat format,
+ VkImageUsageFlags imageUsage,
+ int* grallocUsage)
+{
+ ANV_FROM_HANDLE(anv_device, device, device_h);
+ struct anv_physical_device *phys_dev = &device->instance->physicalDevice;
+ VkPhysicalDevice phys_dev_h = anv_physical_device_to_handle(phys_dev);
+ VkResult result;
+
+ *grallocUsage = 0;
+ intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
+
+ /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+ * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
+ * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
+ *
+ * TODO(jessehall): I think these are right, but haven't thought hard
+ * about it. Do we need to query the driver for support of any of
+ * these?
+ *
+ * Any disagreement between this function and the hardcoded
+ * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
+ * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
+ */
+
+ const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+ .format = format,
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = imageUsage,
+ };
+
+ VkImageFormatProperties2KHR image_format_props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
+ };
+
+ /* Check that requested format and usage are supported. */
+ result = anv_GetPhysicalDeviceImageFormatProperties2KHR(phys_dev_h,
+ &image_format_info, &image_format_props);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, device, result,
+ "anv_GetPhysicalDeviceImageFormatProperties2KHR failed "
+ "inside %s", __func__);
+ }
+
+ /* Reject STORAGE here to avoid complexity elsewhere. */
+ if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc "
+ "swapchain");
+ }
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
+
+ /* All VkImageUsageFlags not explicitly checked here are unsupported for
+ * gralloc swapchains.
+ */
+ if (imageUsage != 0) {
+ return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkImageUsageFlags(0x%x) for gralloc "
+ "swapchain", imageUsage);
+ }
+
+ /* The below formats support GRALLOC_USAGE_HW_FB (that is, display
+ * scanout). This short list of formats is univserally supported on Intel
+ * but is incomplete. The full set of supported formats is dependent on
+ * kernel and hardware.
+ *
+ * FINISHME: Advertise all display-supported formats.
+ */
+ if (format == VK_FORMAT_B8G8R8A8_UNORM ||
+ format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+ *grallocUsage |= GRALLOC_USAGE_HW_FB |
+ GRALLOC_USAGE_HW_COMPOSER |
+ GRALLOC_USAGE_EXTERNAL_DISP;
+ }
+
+ if (*grallocUsage == 0)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_AcquireImageANDROID(
+ VkDevice device_h,
+ VkImage image_h,
+ int nativeFenceFd,
+ VkSemaphore semaphore_h,
+ VkFence fence_h)
+{
+ ANV_FROM_HANDLE(anv_device, device, device_h);
+ VkResult result = VK_SUCCESS;
+
+ if (nativeFenceFd != -1) {
+ /* As a simple, firstpass implementation of VK_ANDROID_native_buffer, we
+ * block on the nativeFenceFd. This may introduce latency and is
+ * definitiely inefficient, yet it's correct.
+ *
+ * FINISHME(chadv): Import the nativeFenceFd into the VkSemaphore and
+ * VkFence.
+ */
+ if (sync_wait(nativeFenceFd, /*timeout*/ -1) < 0) {
+ result = vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
+ "%s: failed to wait on nativeFenceFd=%d",
+ __func__, nativeFenceFd);
+ }
+
+ /* From VK_ANDROID_native_buffer's pseudo spec
+ * (https://source.android.com/devices/graphics/implement-vulkan):
+ *
+ * The driver takes ownership of the fence fd and is responsible for
+ * closing it [...] even if vkAcquireImageANDROID fails and returns
+ * an error.
+ */
+ close(nativeFenceFd);
+
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ if (semaphore_h || fence_h) {
+ /* Thanks to implicit sync, the image is ready for GPU access. But we
+ * must still put the semaphore into the "submit" state; otherwise the
+ * client may get unexpected behavior if the client later uses it as
+ * a wait semaphore.
+ *
+ * Because we blocked above on the nativeFenceFd, the image is also
+ * ready for foreign-device access (including CPU access). But we must
+ * still signal the fence; otherwise the client may get unexpected
+ * behavior if the client later waits on it.
+ *
+ * For some values of anv_semaphore_type, we must submit the semaphore
+ * to execbuf in order to signal it. Likewise for anv_fence_type.
+ * Instead of open-coding here the signal operation for each
+ * anv_semaphore_type and anv_fence_type, we piggy-back on
+ * vkQueueSubmit.
+ */
+ const VkSubmitInfo submit = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .waitSemaphoreCount = 0,
+ .commandBufferCount = 0,
+ .signalSemaphoreCount = (semaphore_h ? 1 : 0),
+ .pSignalSemaphores = &semaphore_h,
+ };
+
+ result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1,
+ &submit, fence_h);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, device, result,
+ "anv_QueueSubmit failed inside %s", __func__);
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_QueueSignalReleaseImageANDROID(
+ VkQueue queue,
+ uint32_t waitSemaphoreCount,
+ const VkSemaphore* pWaitSemaphores,
+ VkImage image,
+ int* pNativeFenceFd)
+{
+ VkResult result;
+
+ if (waitSemaphoreCount == 0)
+ goto done;
+
+ result = anv_QueueSubmit(queue, 1,
+ &(VkSubmitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = pWaitSemaphores,
+ },
+ (VkFence) VK_NULL_HANDLE);
+ if (result != VK_SUCCESS)
+ return result;
+
+ done:
+ if (pNativeFenceFd) {
+ /* We can rely implicit on sync because above we submitted all
+ * semaphores to the queue.
+ */
+ *pNativeFenceFd = -1;
+ }
+
+ return VK_SUCCESS;
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_debug_report.c b/lib/mesa/src/intel/vulkan/anv_debug_report.c
new file mode 100644
index 000000000..55d62057c
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_debug_report.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "vk_util.h"
+
+/* This file contains implementation for VK_EXT_debug_report. */
+
+VkResult
+anv_CreateDebugReportCallbackEXT(VkInstance _instance,
+ const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkDebugReportCallbackEXT* pCallback)
+{
+ ANV_FROM_HANDLE(anv_instance, instance, _instance);
+
+ struct anv_debug_report_callback *cb =
+ vk_alloc2(&instance->alloc, pAllocator,
+ sizeof(struct anv_debug_report_callback), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (!cb)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ cb->flags = pCreateInfo->flags;
+ cb->callback = pCreateInfo->pfnCallback;
+ cb->data = pCreateInfo->pUserData;
+
+ pthread_mutex_lock(&instance->callbacks_mutex);
+ list_addtail(&cb->link, &instance->callbacks);
+ pthread_mutex_unlock(&instance->callbacks_mutex);
+
+ *pCallback = anv_debug_report_callback_to_handle(cb);
+
+ return VK_SUCCESS;
+}
+
+void
+anv_DestroyDebugReportCallbackEXT(VkInstance _instance,
+ VkDebugReportCallbackEXT _callback,
+ const VkAllocationCallbacks* pAllocator)
+{
+ ANV_FROM_HANDLE(anv_instance, instance, _instance);
+ ANV_FROM_HANDLE(anv_debug_report_callback, callback, _callback);
+
+ /* Remove from list and destroy given callback. */
+ pthread_mutex_lock(&instance->callbacks_mutex);
+ list_del(&callback->link);
+ vk_free2(&instance->alloc, pAllocator, callback);
+ pthread_mutex_unlock(&instance->callbacks_mutex);
+}
+
+void
+anv_DebugReportMessageEXT(VkInstance _instance,
+ VkDebugReportFlagsEXT flags,
+ VkDebugReportObjectTypeEXT objectType,
+ uint64_t object,
+ size_t location,
+ int32_t messageCode,
+ const char* pLayerPrefix,
+ const char* pMessage)
+{
+ ANV_FROM_HANDLE(anv_instance, instance, _instance);
+ anv_debug_report(instance, flags, objectType, object,
+ location, messageCode, pLayerPrefix, pMessage);
+}
+
+void
+anv_debug_report(struct anv_instance *instance,
+ VkDebugReportFlagsEXT flags,
+ VkDebugReportObjectTypeEXT object_type,
+ uint64_t handle,
+ size_t location,
+ int32_t messageCode,
+ const char* pLayerPrefix,
+ const char *pMessage)
+{
+ /* Allow NULL for convinience, return if no callbacks registered. */
+ if (!instance || list_empty(&instance->callbacks))
+ return;
+
+ pthread_mutex_lock(&instance->callbacks_mutex);
+
+ /* Section 33.2 of the Vulkan 1.0.59 spec says:
+ *
+ * "callback is an externally synchronized object and must not be
+ * used on more than one thread at a time. This means that
+ * vkDestroyDebugReportCallbackEXT must not be called when a callback
+ * is active."
+ */
+ list_for_each_entry(struct anv_debug_report_callback, cb,
+ &instance->callbacks, link) {
+ if (cb->flags & flags)
+ cb->callback(flags, object_type, handle, location, messageCode,
+ pLayerPrefix, pMessage, cb->data);
+ }
+
+ pthread_mutex_unlock(&instance->callbacks_mutex);
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_dump.c b/lib/mesa/src/intel/vulkan/anv_dump.c
index 060890421..160c18c4f 100644
--- a/lib/mesa/src/intel/vulkan/anv_dump.c
+++ b/lib/mesa/src/intel/vulkan/anv_dump.c
@@ -424,20 +424,25 @@ anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer,
uint32_t b;
for_each_bit(b, iview->image->aspects) {
VkImageAspectFlagBits aspect = (1 << b);
- char suffix;
+ const char *suffix;
switch (aspect) {
- case VK_IMAGE_ASPECT_COLOR_BIT: suffix = 'c'; break;
- case VK_IMAGE_ASPECT_DEPTH_BIT: suffix = 'd'; break;
- case VK_IMAGE_ASPECT_STENCIL_BIT: suffix = 's'; break;
+ case VK_IMAGE_ASPECT_COLOR_BIT: suffix = "c"; break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT: suffix = "d"; break;
+ case VK_IMAGE_ASPECT_STENCIL_BIT: suffix = "s"; break;
+ case VK_IMAGE_ASPECT_PLANE_0_BIT_KHR: suffix = "c0"; break;
+ case VK_IMAGE_ASPECT_PLANE_1_BIT_KHR: suffix = "c1"; break;
+ case VK_IMAGE_ASPECT_PLANE_2_BIT_KHR: suffix = "c2"; break;
default:
unreachable("Invalid aspect");
}
- char *filename = ralloc_asprintf(dump_ctx, "framebuffer%04d-%d%c.ppm",
+ char *filename = ralloc_asprintf(dump_ctx, "framebuffer%04d-%d%s.ppm",
dump_idx, i, suffix);
+ unsigned plane = anv_image_aspect_to_plane(iview->image->aspects, aspect);
dump_add_image(cmd_buffer, (struct anv_image *)iview->image, aspect,
- iview->isl.base_level, iview->isl.base_array_layer,
+ iview->planes[plane].isl.base_level,
+ iview->planes[plane].isl.base_array_layer,
filename);
}
}
diff --git a/lib/mesa/src/intel/vulkan/anv_extensions.c b/lib/mesa/src/intel/vulkan/anv_extensions.c
new file mode 100644
index 000000000..a5c1ea68a
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_extensions.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+# undef VK_USE_PLATFORM_ANDROID_KHR
+# define VK_USE_PLATFORM_ANDROID_KHR true
+#else
+# define VK_USE_PLATFORM_ANDROID_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+# undef VK_USE_PLATFORM_WAYLAND_KHR
+# define VK_USE_PLATFORM_WAYLAND_KHR true
+#else
+# define VK_USE_PLATFORM_WAYLAND_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XCB_KHR
+# undef VK_USE_PLATFORM_XCB_KHR
+# define VK_USE_PLATFORM_XCB_KHR true
+#else
+# define VK_USE_PLATFORM_XCB_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+# undef VK_USE_PLATFORM_XLIB_KHR
+# define VK_USE_PLATFORM_XLIB_KHR true
+#else
+# define VK_USE_PLATFORM_XLIB_KHR false
+#endif
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define ANV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+anv_instance_extension_supported(const char *name)
+{
+ if (strcmp(name, "VK_KHR_external_fence_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_get_surface_capabilities2") == 0)
+ return ANV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_surface") == 0)
+ return ANV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_wayland_surface") == 0)
+ return VK_USE_PLATFORM_WAYLAND_KHR;
+ if (strcmp(name, "VK_KHR_xcb_surface") == 0)
+ return VK_USE_PLATFORM_XCB_KHR;
+ if (strcmp(name, "VK_KHR_xlib_surface") == 0)
+ return VK_USE_PLATFORM_XLIB_KHR;
+ if (strcmp(name, "VK_EXT_debug_report") == 0)
+ return true;
+ return false;
+}
+
+VkResult anv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_fence_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_physical_device_properties2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (ANV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_surface_capabilities2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (ANV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_surface",
+ .specVersion = 25,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_WAYLAND_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_wayland_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XCB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xcb_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XLIB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xlib_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_EXT_debug_report",
+ .specVersion = 8,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+anv_physical_device_api_version(struct anv_physical_device *dev)
+{
+ return VK_MAKE_VERSION(1, 0, 57);
+}
+
+bool
+anv_physical_device_extension_supported(struct anv_physical_device *device,
+ const char *name)
+{
+ if (strcmp(name, "VK_ANDROID_native_buffer") == 0)
+ return ANDROID;
+ if (strcmp(name, "VK_KHR_bind_memory2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_dedicated_allocation") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_descriptor_update_template") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_fence") == 0)
+ return device->has_syncobj_wait;
+ if (strcmp(name, "VK_KHR_external_fence_fd") == 0)
+ return device->has_syncobj_wait;
+ if (strcmp(name, "VK_KHR_external_memory") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory_fd") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_image_format_list") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_incremental_present") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance1") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_push_descriptor") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_sampler_ycbcr_conversion") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_swapchain") == 0)
+ return ANV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_variable_pointers") == 0)
+ return true;
+ if (strcmp(name, "VK_KHX_multiview") == 0)
+ return false;
+ return false;
+}
+
+VkResult anv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+ if (ANDROID) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_ANDROID_native_buffer",
+ .specVersion = 5,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_bind_memory2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_dedicated_allocation",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_descriptor_update_template",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->has_syncobj_wait) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_fence",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->has_syncobj_wait) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_fence_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_memory_requirements2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_image_format_list",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_incremental_present",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance1",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_push_descriptor",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_relaxed_block_layout",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_sampler_ycbcr_conversion",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_shader_draw_parameters",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_storage_buffer_storage_class",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (ANV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_swapchain",
+ .specVersion = 68,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_variable_pointers",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (false) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHX_multiview",
+ .specVersion = 1,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
diff --git a/lib/mesa/src/intel/vulkan/anv_extensions.py b/lib/mesa/src/intel/vulkan/anv_extensions.py
new file mode 100644
index 000000000..0f66c59e1
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_extensions.py
@@ -0,0 +1,284 @@
+COPYRIGHT = """\
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+"""
+
+import argparse
+import copy
+import re
+import xml.etree.cElementTree as et
+
+MAX_API_VERSION = '1.0.57'
+
+class Extension:
+ def __init__(self, name, ext_version, enable):
+ self.name = name
+ self.ext_version = int(ext_version)
+ if enable is True:
+ self.enable = 'true';
+ elif enable is False:
+ self.enable = 'false';
+ else:
+ self.enable = enable;
+
+# On Android, we disable all surface and swapchain extensions. Android's Vulkan
+# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications
+# cannot access the driver's implementation. Moreoever, if the driver exposes
+# the those extension strings, then tests dEQP-VK.api.info.instance.extensions
+# and dEQP-VK.api.info.device fail due to the duplicated strings.
+EXTENSIONS = [
+ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'),
+ Extension('VK_KHR_bind_memory2', 1, True),
+ Extension('VK_KHR_dedicated_allocation', 1, True),
+ Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_external_fence', 1,
+ 'device->has_syncobj_wait'),
+ Extension('VK_KHR_external_fence_capabilities', 1, True),
+ Extension('VK_KHR_external_fence_fd', 1,
+ 'device->has_syncobj_wait'),
+ Extension('VK_KHR_external_memory', 1, True),
+ Extension('VK_KHR_external_memory_capabilities', 1, True),
+ Extension('VK_KHR_external_memory_fd', 1, True),
+ Extension('VK_KHR_external_semaphore', 1, True),
+ Extension('VK_KHR_external_semaphore_capabilities', 1, True),
+ Extension('VK_KHR_external_semaphore_fd', 1, True),
+ Extension('VK_KHR_get_memory_requirements2', 1, True),
+ Extension('VK_KHR_get_physical_device_properties2', 1, True),
+ Extension('VK_KHR_get_surface_capabilities2', 1, 'ANV_HAS_SURFACE'),
+ Extension('VK_KHR_image_format_list', 1, True),
+ Extension('VK_KHR_incremental_present', 1, True),
+ Extension('VK_KHR_maintenance1', 1, True),
+ Extension('VK_KHR_maintenance2', 1, True),
+ Extension('VK_KHR_push_descriptor', 1, True),
+ Extension('VK_KHR_relaxed_block_layout', 1, True),
+ Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
+ Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
+ Extension('VK_KHR_shader_draw_parameters', 1, True),
+ Extension('VK_KHR_storage_buffer_storage_class', 1, True),
+ Extension('VK_KHR_surface', 25, 'ANV_HAS_SURFACE'),
+ Extension('VK_KHR_swapchain', 68, 'ANV_HAS_SURFACE'),
+ Extension('VK_KHR_variable_pointers', 1, True),
+ Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
+ Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
+ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
+ Extension('VK_KHX_multiview', 1, False),
+ Extension('VK_EXT_debug_report', 8, True),
+]
+
+class VkVersion:
+ def __init__(self, string):
+ split = string.split('.')
+ self.major = int(split[0])
+ self.minor = int(split[1])
+ if len(split) > 2:
+ assert len(split) == 3
+ self.patch = int(split[2])
+ else:
+ self.patch = None
+
+ # Sanity check. The range bits are required by the definition of the
+ # VK_MAKE_VERSION macro
+ assert self.major < 1024 and self.minor < 1024
+ assert self.patch is None or self.patch < 4096
+ assert(str(self) == string)
+
+ def __str__(self):
+ ver_list = [str(self.major), str(self.minor)]
+ if self.patch is not None:
+ ver_list.append(str(self.patch))
+ return '.'.join(ver_list)
+
+ def c_vk_version(self):
+ ver_list = [str(self.major), str(self.minor), str(self.patch)]
+ return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
+
+ def __int_ver(self):
+ # This is just an expansion of VK_VERSION
+ patch = self.patch if self.patch is not None else 0
+ return (self.major << 22) | (self.minor << 12) | patch
+
+ def __cmp__(self, other):
+ # If only one of them has a patch version, "ignore" it by making
+ # other's patch version match self.
+ if (self.patch is None) != (other.patch is None):
+ other = copy.copy(other)
+ other.patch = self.patch
+
+ return self.__int_ver().__cmp__(other.__int_ver())
+
+MAX_API_VERSION = VkVersion(MAX_API_VERSION)
+
+def _init_exts_from_xml(xml):
+ """ Walk the Vulkan XML and fill out extra extension information. """
+
+ xml = et.parse(xml)
+
+ ext_name_map = {}
+ for ext in EXTENSIONS:
+ ext_name_map[ext.name] = ext
+
+ for ext_elem in xml.findall('.extensions/extension'):
+ ext_name = ext_elem.attrib['name']
+ if ext_name not in ext_name_map:
+ continue
+
+ # Workaround for VK_ANDROID_native_buffer. Its <extension> element in
+ # vk.xml lists it as supported="disabled" and provides only a stub
+ # definition. Its <extension> element in Mesa's custom
+ # vk_android_native_buffer.xml, though, lists it as
+ # supported='android-vendor' and fully defines the extension. We want
+ # to skip the <extension> element in vk.xml.
+ if ext_elem.attrib['supported'] == 'disabled':
+ assert ext_name == 'VK_ANDROID_native_buffer'
+ continue
+
+ ext = ext_name_map[ext_name]
+ ext.type = ext_elem.attrib['type']
+
+_TEMPLATE = COPYRIGHT + """
+#include "anv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']:
+#ifdef VK_USE_PLATFORM_${platform}_KHR
+# undef VK_USE_PLATFORM_${platform}_KHR
+# define VK_USE_PLATFORM_${platform}_KHR true
+#else
+# define VK_USE_PLATFORM_${platform}_KHR false
+#endif
+%endfor
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define ANV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\
+ VK_USE_PLATFORM_XCB_KHR || \\
+ VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+anv_instance_extension_supported(const char *name)
+{
+%for ext in instance_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult anv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+%for ext in instance_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+anv_physical_device_api_version(struct anv_physical_device *dev)
+{
+ return ${MAX_API_VERSION.c_vk_version()};
+}
+
+bool
+anv_physical_device_extension_supported(struct anv_physical_device *device,
+ const char *name)
+{
+%for ext in device_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult anv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+%for ext in device_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+"""
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', help='Output C file.', required=True)
+ parser.add_argument('--xml',
+ help='Vulkan API XML file.',
+ required=True,
+ action='append',
+ dest='xml_files')
+ args = parser.parse_args()
+
+ for filename in args.xml_files:
+ _init_exts_from_xml(filename)
+
+ for ext in EXTENSIONS:
+ assert ext.type == 'instance' or ext.type == 'device'
+
+ template_env = {
+ 'MAX_API_VERSION': MAX_API_VERSION,
+ 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'],
+ 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'],
+ }
+
+ from mako.template import Template
+
+ with open(args.out, 'w') as f:
+ f.write(Template(_TEMPLATE).render(**template_env))
diff --git a/lib/mesa/src/intel/vulkan/anv_icd.py b/lib/mesa/src/intel/vulkan/anv_icd.py
new file mode 100644
index 000000000..31bb0687a
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_icd.py
@@ -0,0 +1,47 @@
+# Copyright 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import json
+import os.path
+
+from anv_extensions import *
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', help='Output jsono file.', required=True)
+ parser.add_argument('--lib-path', help='Path to libvulkan_intel.so')
+ args = parser.parse_args()
+
+ path = 'libvulkan_intel.so'
+ if args.lib_path:
+ path = os.path.join(args.lib_path, path)
+
+ json_data = {
+ 'file_format_version': '1.0.0',
+ 'ICD': {
+ 'library_path': path,
+ 'api_version': str(MAX_API_VERSION),
+ },
+ }
+
+ with open(args.out, 'w') as f:
+ json.dump(json_data, f, indent = 4, sort_keys=True)
diff --git a/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c b/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
new file mode 100644
index 000000000..028f24e2f
--- /dev/null
+++ b/lib/mesa/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "anv_private.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+
+struct ycbcr_state {
+ nir_builder *builder;
+ nir_ssa_def *image_size;
+ nir_tex_instr *origin_tex;
+ struct anv_ycbcr_conversion *conversion;
+};
+
+static nir_ssa_def *
+y_range(nir_builder *b,
+ nir_ssa_def *y_channel,
+ int bpc,
+ VkSamplerYcbcrRangeKHR range)
+{
+ switch (range) {
+ case VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR:
+ return y_channel;
+ case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR:
+ return nir_fmul(b,
+ nir_fadd(b,
+ nir_fmul(b, y_channel,
+ nir_imm_float(b, pow(2, bpc) - 1)),
+ nir_imm_float(b, -16.0f * pow(2, bpc - 8))),
+ nir_frcp(b, nir_imm_float(b, 219.0f * pow(2, bpc - 8))));
+ default:
+ unreachable("missing Ycbcr range");
+ return NULL;
+ }
+}
+
+static nir_ssa_def *
+chroma_range(nir_builder *b,
+ nir_ssa_def *chroma_channel,
+ int bpc,
+ VkSamplerYcbcrRangeKHR range)
+{
+ switch (range) {
+ case VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR:
+ return nir_fadd(b, chroma_channel,
+ nir_imm_float(b, -pow(2, bpc - 1) / (pow(2, bpc) - 1.0f)));
+ case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR:
+ return nir_fmul(b,
+ nir_fadd(b,
+ nir_fmul(b, chroma_channel,
+ nir_imm_float(b, pow(2, bpc) - 1)),
+ nir_imm_float(b, -128.0f * pow(2, bpc - 8))),
+ nir_frcp(b, nir_imm_float(b, 224.0f * pow(2, bpc - 8))));
+ default:
+ unreachable("missing Ycbcr range");
+ return NULL;
+ }
+}
+
+static const nir_const_value *
+ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversionKHR model)
+{
+ switch (model) {
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR: {
+ static const nir_const_value bt601[3] = {
+ { .f32 = { 1.402f, 1.0f, 0.0f, 0.0f } },
+ { .f32 = { -0.714136286201022f, 1.0f, -0.344136286201022f, 0.0f } },
+ { .f32 = { 0.0f, 1.0f, 1.772f, 0.0f } }
+ };
+
+ return bt601;
+ }
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR: {
+ static const nir_const_value bt709[3] = {
+ { .f32 = { 1.5748031496063f, 1.0f, 0.0, 0.0f } },
+ { .f32 = { -0.468125209181067f, 1.0f, -0.187327487470334f, 0.0f } },
+ { .f32 = { 0.0f, 1.0f, 1.85563184264242f, 0.0f } }
+ };
+
+ return bt709;
+ }
+ case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR: {
+ static const nir_const_value bt2020[3] = {
+ { .f32 = { 1.4746f, 1.0f, 0.0f, 0.0f } },
+ { .f32 = { -0.571353126843658f, 1.0f, -0.164553126843658f, 0.0f } },
+ { .f32 = { 0.0f, 1.0f, 1.8814f, 0.0f } }
+ };
+
+ return bt2020;
+ }
+ default:
+ unreachable("missing Ycbcr model");
+ return NULL;
+ }
+}
+
+static nir_ssa_def *
+convert_ycbcr(struct ycbcr_state *state,
+ nir_ssa_def *raw_channels,
+ uint32_t *bpcs)
+{
+ nir_builder *b = state->builder;
+ struct anv_ycbcr_conversion *conversion = state->conversion;
+
+ nir_ssa_def *expanded_channels =
+ nir_vec4(b,
+ chroma_range(b, nir_channel(b, raw_channels, 0),
+ bpcs[0], conversion->ycbcr_range),
+ y_range(b, nir_channel(b, raw_channels, 1),
+ bpcs[1], conversion->ycbcr_range),
+ chroma_range(b, nir_channel(b, raw_channels, 2),
+ bpcs[2], conversion->ycbcr_range),
+ nir_imm_float(b, 1.0f));
+
+ if (conversion->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR)
+ return expanded_channels;
+
+ const nir_const_value *conversion_matrix =
+ ycbcr_model_to_rgb_matrix(conversion->ycbcr_model);
+
+ nir_ssa_def *converted_channels[] = {
+ nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[0])),
+ nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[1])),
+ nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix[2]))
+ };
+
+ return nir_vec4(b,
+ converted_channels[0], converted_channels[1],
+ converted_channels[2], nir_imm_float(b, 1.0f));
+}
+
+/* TODO: we should probably replace this with a push constant/uniform. */
+static nir_ssa_def *
+get_texture_size(struct ycbcr_state *state, nir_deref_var *texture)
+{
+ if (state->image_size)
+ return state->image_size;
+
+ nir_builder *b = state->builder;
+ const struct glsl_type *type = nir_deref_tail(&texture->deref)->type;
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, 0);
+
+ tex->op = nir_texop_txs;
+ tex->sampler_dim = glsl_get_sampler_dim(type);
+ tex->is_array = glsl_sampler_type_is_array(type);
+ tex->is_shadow = glsl_sampler_type_is_shadow(type);
+ tex->texture = nir_deref_var_clone(texture, tex);
+ tex->dest_type = nir_type_int;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest,
+ nir_tex_instr_dest_size(tex), 32, NULL);
+ nir_builder_instr_insert(b, &tex->instr);
+
+ state->image_size = nir_i2f32(b, &tex->dest.ssa);
+
+ return state->image_size;
+}
+
+static nir_ssa_def *
+implicit_downsampled_coord(nir_builder *b,
+ nir_ssa_def *value,
+ nir_ssa_def *max_value,
+ int div_scale)
+{
+ return nir_fadd(b,
+ value,
+ nir_fdiv(b,
+ nir_imm_float(b, 1.0f),
+ nir_fmul(b,
+ nir_imm_float(b, div_scale),
+ max_value)));
+}
+
+static nir_ssa_def *
+implicit_downsampled_coords(struct ycbcr_state *state,
+ nir_ssa_def *old_coords,
+ const struct anv_format_plane *plane_format)
+{
+ nir_builder *b = state->builder;
+ struct anv_ycbcr_conversion *conversion = state->conversion;
+ nir_ssa_def *image_size = get_texture_size(state,
+ state->origin_tex->texture);
+ nir_ssa_def *comp[4] = { NULL, };
+ int c;
+
+ for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) {
+ if (plane_format->denominator_scales[c] > 1 &&
+ conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN_KHR) {
+ comp[c] = implicit_downsampled_coord(b,
+ nir_channel(b, old_coords, c),
+ nir_channel(b, image_size, c),
+ plane_format->denominator_scales[c]);
+ } else {
+ comp[c] = nir_channel(b, old_coords, c);
+ }
+ }
+
+ /* Leave other coordinates untouched */
+ for (; c < old_coords->num_components; c++)
+ comp[c] = nir_channel(b, old_coords, c);
+
+ return nir_vec(b, comp, old_coords->num_components);
+}
+
+static nir_ssa_def *
+create_plane_tex_instr_implicit(struct ycbcr_state *state,
+ uint32_t plane)
+{
+ nir_builder *b = state->builder;
+ struct anv_ycbcr_conversion *conversion = state->conversion;
+ const struct anv_format_plane *plane_format =
+ &conversion->format->planes[plane];
+ nir_tex_instr *old_tex = state->origin_tex;
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
+
+ for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
+ tex->src[i].src_type = old_tex->src[i].src_type;
+
+ switch (old_tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ if (plane_format->has_chroma && conversion->chroma_reconstruction) {
+ assert(old_tex->src[i].src.is_ssa);
+ tex->src[i].src =
+ nir_src_for_ssa(implicit_downsampled_coords(state,
+ old_tex->src[i].src.ssa,
+ plane_format));
+ break;
+ }
+ /* fall through */
+ default:
+ nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
+ break;
+ }
+ }
+ tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
+ tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
+
+ tex->sampler_dim = old_tex->sampler_dim;
+ tex->dest_type = old_tex->dest_type;
+
+ tex->op = old_tex->op;
+ tex->coord_components = old_tex->coord_components;
+ tex->is_new_style_shadow = old_tex->is_new_style_shadow;
+ tex->component = old_tex->component;
+
+ tex->texture_index = old_tex->texture_index;
+ tex->texture_array_size = old_tex->texture_array_size;
+ tex->texture = nir_deref_var_clone(old_tex->texture, tex);
+
+ tex->sampler_index = old_tex->sampler_index;
+ tex->sampler = nir_deref_var_clone(old_tex->sampler, tex);
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest,
+ old_tex->dest.ssa.num_components,
+ nir_dest_bit_size(old_tex->dest), NULL);
+ nir_builder_instr_insert(b, &tex->instr);
+
+ return &tex->dest.ssa;
+}
+
+static unsigned
+channel_to_component(enum isl_channel_select channel)
+{
+ switch (channel) {
+ case ISL_CHANNEL_SELECT_RED:
+ return 0;
+ case ISL_CHANNEL_SELECT_GREEN:
+ return 1;
+ case ISL_CHANNEL_SELECT_BLUE:
+ return 2;
+ case ISL_CHANNEL_SELECT_ALPHA:
+ return 3;
+ default:
+ unreachable("invalid channel");
+ return 0;
+ }
+}
+
+static enum isl_channel_select
+swizzle_channel(struct isl_swizzle swizzle, unsigned channel)
+{
+ switch (channel) {
+ case 0:
+ return swizzle.r;
+ case 1:
+ return swizzle.g;
+ case 2:
+ return swizzle.b;
+ case 3:
+ return swizzle.a;
+ default:
+ unreachable("invalid channel");
+ return 0;
+ }
+}
+
+static bool
+try_lower_tex_ycbcr(struct anv_pipeline *pipeline,
+ nir_builder *builder,
+ nir_tex_instr *tex)
+{
+ nir_variable *var = tex->texture->var;
+ const struct anv_descriptor_set_layout *set_layout =
+ pipeline->layout->set[var->data.descriptor_set].layout;
+ const struct anv_descriptor_set_binding_layout *binding =
+ &set_layout->binding[var->data.binding];
+
+ /* For the following instructions, we don't apply any change and let the
+ * instruction apply to the first plane.
+ */
+ if (tex->op == nir_texop_txs ||
+ tex->op == nir_texop_query_levels ||
+ tex->op == nir_texop_lod)
+ return false;
+
+ if (binding->immutable_samplers == NULL)
+ return false;
+
+ unsigned texture_index = tex->texture_index;
+ if (tex->texture->deref.child) {
+ assert(tex->texture->deref.child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tex->texture->deref.child);
+ if (deref_array->deref_array_type != nir_deref_array_type_direct)
+ return false;
+ size_t hw_binding_size =
+ anv_descriptor_set_binding_layout_get_hw_size(binding);
+ texture_index += MIN2(deref_array->base_offset, hw_binding_size - 1);
+ }
+ const struct anv_sampler *sampler =
+ binding->immutable_samplers[texture_index];
+
+ if (sampler->conversion == NULL)
+ return false;
+
+ struct ycbcr_state state = {
+ .builder = builder,
+ .origin_tex = tex,
+ .conversion = sampler->conversion,
+ };
+
+ builder->cursor = nir_before_instr(&tex->instr);
+
+ const struct anv_format *format = state.conversion->format;
+ const struct isl_format_layout *y_isl_layout = NULL;
+ for (uint32_t p = 0; p < format->n_planes; p++) {
+ if (!format->planes[p].has_chroma)
+ y_isl_layout = isl_format_get_layout(format->planes[p].isl_format);
+ }
+ assert(y_isl_layout != NULL);
+ uint8_t y_bpc = y_isl_layout->channels_array[0].bits;
+
+ /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
+ nir_ssa_def *ycbcr_comp[5] = { NULL, NULL, NULL,
+ /* Use extra 2 channels for following swizzle */
+ nir_imm_float(builder, 1.0f),
+ nir_imm_float(builder, 0.0f),
+ };
+ uint8_t ycbcr_bpcs[5];
+ memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
+
+ /* Go through all the planes and gather the samples into a |ycbcr_comp|
+ * while applying a swizzle required by the spec:
+ *
+ * R, G, B should respectively map to Cr, Y, Cb
+ */
+ for (uint32_t p = 0; p < format->n_planes; p++) {
+ const struct anv_format_plane *plane_format = &format->planes[p];
+ nir_ssa_def *plane_sample = create_plane_tex_instr_implicit(&state, p);
+
+ for (uint32_t pc = 0; pc < 4; pc++) {
+ enum isl_channel_select ycbcr_swizzle =
+ swizzle_channel(plane_format->ycbcr_swizzle, pc);
+ if (ycbcr_swizzle == ISL_CHANNEL_SELECT_ZERO)
+ continue;
+
+ unsigned ycbcr_component = channel_to_component(ycbcr_swizzle);
+ ycbcr_comp[ycbcr_component] = nir_channel(builder, plane_sample, pc);
+
+ /* Also compute the number of bits for each component. */
+ const struct isl_format_layout *isl_layout =
+ isl_format_get_layout(plane_format->isl_format);
+ ycbcr_bpcs[ycbcr_component] = isl_layout->channels_array[pc].bits;
+ }
+ }
+
+ /* Now remaps components to the order specified by the conversion. */
+ nir_ssa_def *swizzled_comp[4] = { NULL, };
+ uint32_t swizzled_bpcs[4] = { 0, };
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state.conversion->mapping); i++) {
+ /* Maps to components in |ycbcr_comp| */
+ static const uint32_t swizzle_mapping[] = {
+ [VK_COMPONENT_SWIZZLE_ZERO] = 4,
+ [VK_COMPONENT_SWIZZLE_ONE] = 3,
+ [VK_COMPONENT_SWIZZLE_R] = 0,
+ [VK_COMPONENT_SWIZZLE_G] = 1,
+ [VK_COMPONENT_SWIZZLE_B] = 2,
+ [VK_COMPONENT_SWIZZLE_A] = 3,
+ };
+ const VkComponentSwizzle m = state.conversion->mapping[i];
+
+ if (m == VK_COMPONENT_SWIZZLE_IDENTITY) {
+ swizzled_comp[i] = ycbcr_comp[i];
+ swizzled_bpcs[i] = ycbcr_bpcs[i];
+ } else {
+ swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]];
+ swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]];
+ }
+ }
+
+ nir_ssa_def *result = nir_vec(builder, swizzled_comp, 4);
+ if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR)
+ result = convert_ycbcr(&state, result, swizzled_bpcs);
+
+ nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
+ nir_instr_remove(&tex->instr);
+
+ return true;
+}
+
+bool
+anv_nir_lower_ycbcr_textures(nir_shader *shader, struct anv_pipeline *pipeline)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ bool function_progress = false;
+ nir_builder builder;
+ nir_builder_init(&builder, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ function_progress |= try_lower_tex_ycbcr(pipeline, &builder, tex);
+ }
+ }
+
+ if (function_progress) {
+ nir_metadata_preserve(function->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ progress |= function_progress;
+ }
+
+ return progress;
+}