summaryrefslogtreecommitdiff
path: root/lib/mesa/src/freedreno/vulkan/tu_private.h
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:10:09 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2020-01-22 02:10:09 +0000
commitd1e8c371581041f403dcdcff4ab8a88e970d221e (patch)
tree621cf3eea9401b6fc19ce2a6dc5aa7579ecc8c70 /lib/mesa/src/freedreno/vulkan/tu_private.h
parent81f619d3e99a3a218e6318d06c2bc1a36052e75d (diff)
Import Mesa 19.2.8
Diffstat (limited to 'lib/mesa/src/freedreno/vulkan/tu_private.h')
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_private.h2165
1 files changed, 829 insertions, 1336 deletions
diff --git a/lib/mesa/src/freedreno/vulkan/tu_private.h b/lib/mesa/src/freedreno/vulkan/tu_private.h
index 862d507c9..c2440471f 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_private.h
+++ b/lib/mesa/src/freedreno/vulkan/tu_private.h
@@ -40,47 +40,28 @@
#include <valgrind.h>
#define VG(x) x
#else
-#define VG(x) ((void)0)
+#define VG(x)
#endif
-#define MESA_LOG_TAG "TU"
-
#include "c11/threads.h"
-#include "util/rounding.h"
-#include "util/bitscan.h"
+#include "compiler/shader_enums.h"
+#include "main/macros.h"
#include "util/list.h"
-#include "util/log.h"
#include "util/macros.h"
-#include "util/sparse_array.h"
-#include "util/u_atomic.h"
-#include "util/u_dynarray.h"
-#include "util/xmlconfig.h"
-#include "util/perf/u_trace.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
-#include "vk_device.h"
-#include "vk_dispatch_table.h"
-#include "vk_extensions.h"
-#include "vk_instance.h"
-#include "vk_log.h"
-#include "vk_physical_device.h"
-#include "vk_shader_module.h"
#include "wsi_common.h"
+#include "drm-uapi/msm_drm.h"
#include "ir3/ir3_compiler.h"
#include "ir3/ir3_shader.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
#include "a6xx.xml.h"
-#include "fdl/freedreno_layout.h"
-#include "common/freedreno_dev_info.h"
-#include "perfcntrs/freedreno_perfcntr.h"
#include "tu_descriptor_set.h"
-#include "tu_autotune.h"
-#include "tu_util.h"
-#include "tu_perfetto.h"
+#include "tu_extensions.h"
/* Pre-declarations needed for WSI entrypoints */
struct wl_surface;
@@ -92,54 +73,143 @@ typedef uint32_t xcb_window_t;
#include <vulkan/vk_android_native_buffer.h>
#include <vulkan/vk_icd.h>
#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_intel.h>
#include "tu_entrypoints.h"
-#include "vk_format.h"
-#include "vk_image.h"
-#include "vk_command_buffer.h"
-#include "vk_command_pool.h"
-#include "vk_queue.h"
-#include "vk_object.h"
-#include "vk_sync.h"
-#include "vk_fence.h"
-#include "vk_semaphore.h"
-#include "vk_drm_syncobj.h"
-#include "vk_sync_timeline.h"
-
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
#define MAX_VSC_PIPES 32
-#define MAX_VIEWPORTS 16
-#define MAX_VIEWPORT_SIZE (1 << 14)
+#define MAX_VIEWPORTS 1
#define MAX_SCISSORS 16
#define MAX_DISCARD_RECTANGLES 4
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
-#define MAX_DYNAMIC_BUFFERS_SIZE \
- (MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \
- A6XX_TEX_CONST_DWORDS
-
+#define MAX_DYNAMIC_BUFFERS \
+ (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
+#define MAX_SAMPLES_LOG2 4
+#define NUM_META_FS_KEYS 13
#define TU_MAX_DRM_DEVICES 8
-#define MAX_VIEWS 16
-#define MAX_BIND_POINTS 2 /* compute + graphics */
-/* The Qualcomm driver exposes 0x20000058 */
-#define MAX_STORAGE_BUFFER_RANGE 0x20000000
-/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
- * expose the same maximum range.
- * TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual
- * range might be higher.
+#define MAX_VIEWS 8
+
+#define NUM_DEPTH_CLEAR_PIPELINES 3
+
+/*
+ * This is the point we switch from using CP to compute shader
+ * for certain buffer operations.
*/
-#define MAX_UNIFORM_BUFFER_RANGE 0x10000
+#define TU_BUFFER_OPS_CS_THRESHOLD 4096
+
+enum tu_mem_heap
+{
+ TU_MEM_HEAP_VRAM,
+ TU_MEM_HEAP_VRAM_CPU_ACCESS,
+ TU_MEM_HEAP_GTT,
+ TU_MEM_HEAP_COUNT
+};
+
+enum tu_mem_type
+{
+ TU_MEM_TYPE_VRAM,
+ TU_MEM_TYPE_GTT_WRITE_COMBINE,
+ TU_MEM_TYPE_VRAM_CPU_ACCESS,
+ TU_MEM_TYPE_GTT_CACHED,
+ TU_MEM_TYPE_COUNT
+};
+
+#define tu_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
+
+static inline uint32_t
+align_u32(uint32_t v, uint32_t a)
+{
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
+}
+
+static inline uint32_t
+align_u32_npot(uint32_t v, uint32_t a)
+{
+ return (v + a - 1) / a * a;
+}
+
+static inline uint64_t
+align_u64(uint64_t v, uint64_t a)
+{
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
+}
+
+static inline int32_t
+align_i32(int32_t v, int32_t a)
+{
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
+}
-#define A6XX_TEX_CONST_DWORDS 16
-#define A6XX_TEX_SAMP_DWORDS 4
+/** Alignment must be a power of 2. */
+static inline bool
+tu_is_aligned(uintmax_t n, uintmax_t a)
+{
+ assert(a == (a & -a));
+ return (n & (a - 1)) == 0;
+}
-#define COND(bool, val) ((bool) ? (val) : 0)
-#define BIT(bit) (1u << (bit))
+static inline uint32_t
+round_up_u32(uint32_t v, uint32_t a)
+{
+ return (v + a - 1) / a;
+}
+
+static inline uint64_t
+round_up_u64(uint64_t v, uint64_t a)
+{
+ return (v + a - 1) / a;
+}
+
+static inline uint32_t
+tu_minify(uint32_t n, uint32_t levels)
+{
+ if (unlikely(n == 0))
+ return 0;
+ else
+ return MAX2(n >> levels, 1);
+}
+static inline float
+tu_clamp_f(float f, float min, float max)
+{
+ assert(min < max);
+
+ if (f > max)
+ return max;
+ else if (f < min)
+ return min;
+ else
+ return f;
+}
+
+static inline bool
+tu_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
+{
+ if (*inout_mask & clear_mask) {
+ *inout_mask &= ~clear_mask;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+#define for_each_bit(b, dword) \
+ for (uint32_t __dword = (dword); \
+ (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b)))
+
+#define typed_memcpy(dest, src, count) \
+ ({ \
+ STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
+ memcpy((dest), (src), (count) * sizeof(*(src))); \
+ })
/* Whenever we generate an error, pass it through this function. Useful for
* debugging, where we can break on it. Only call at error site, not when
@@ -149,25 +219,29 @@ typedef uint32_t xcb_window_t;
struct tu_instance;
VkResult
-__vk_startup_errorf(struct tu_instance *instance,
- VkResult error,
- bool force_print,
- const char *file,
- int line,
- const char *format,
- ...) PRINTFLIKE(6, 7);
-
-/* Prints startup errors if TU_DEBUG=startup is set or on a debug driver
- * build.
- */
-#define vk_startup_errorf(instance, error, format, ...) \
- __vk_startup_errorf(instance, error, \
- instance->debug_flags & TU_DEBUG_STARTUP, \
- __FILE__, __LINE__, format, ##__VA_ARGS__)
+__vk_errorf(struct tu_instance *instance,
+ VkResult error,
+ const char *file,
+ int line,
+ const char *format,
+ ...);
+
+#define vk_error(instance, error) \
+ __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
+#define vk_errorf(instance, error, format, ...) \
+ __vk_errorf(instance, error, __FILE__, __LINE__, format, ##__VA_ARGS__);
void
__tu_finishme(const char *file, int line, const char *format, ...)
- PRINTFLIKE(3, 4);
+ tu_printflike(3, 4);
+void
+tu_loge(const char *format, ...) tu_printflike(1, 2);
+void
+tu_loge_v(const char *format, va_list va);
+void
+tu_logi(const char *format, ...) tu_printflike(1, 2);
+void
+tu_logi_v(const char *format, va_list va);
/**
* Print a FINISHME message, including its source location.
@@ -181,35 +255,46 @@ __tu_finishme(const char *file, int line, const char *format, ...)
} \
} while (0)
+/* A non-fatal assert. Useful for debugging. */
+#ifdef DEBUG
+#define tu_assert(x) \
+ ({ \
+ if (unlikely(!(x))) \
+ fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
+ })
+#else
+#define tu_assert(x)
+#endif
+
+/* Suppress -Wunused in stub functions */
+#define tu_use_args(...) __tu_use_args(0, ##__VA_ARGS__)
+static inline void
+__tu_use_args(int ignore, ...)
+{
+}
+
#define tu_stub() \
do { \
tu_finishme("stub %s", __func__); \
} while (0)
-struct tu_memory_heap {
- /* Standard bits passed on to the client */
- VkDeviceSize size;
- VkMemoryHeapFlags flags;
-
- /** Copied from ANV:
- *
- * Driver-internal book-keeping.
- *
- * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
- */
- VkDeviceSize used __attribute__ ((aligned (8)));
-};
-
-uint64_t
-tu_get_system_heap_size(void);
+void *
+tu_lookup_entrypoint_unchecked(const char *name);
+void *
+tu_lookup_entrypoint_checked(
+ const char *name,
+ uint32_t core_version,
+ const struct tu_instance_extension_table *instance,
+ const struct tu_device_extension_table *device);
struct tu_physical_device
{
- struct vk_physical_device vk;
+ VK_LOADER_DATA _loader_data;
struct tu_instance *instance;
- const char *name;
+ char path[20];
+ char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
uint8_t driver_uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
uint8_t cache_uuid[VK_UUID_SIZE];
@@ -217,71 +302,43 @@ struct tu_physical_device
struct wsi_device wsi_device;
int local_fd;
- bool has_local;
- int64_t local_major;
- int64_t local_minor;
int master_fd;
- bool has_master;
- int64_t master_major;
- int64_t master_minor;
+ unsigned gpu_id;
uint32_t gmem_size;
- uint64_t gmem_base;
- uint32_t ccu_offset_gmem;
- uint32_t ccu_offset_bypass;
-
- struct fd_dev_id dev_id;
- const struct fd_dev_info *info;
-
- int msm_major_version;
- int msm_minor_version;
-
- /* Address space and global fault count for this local_fd with DRM backend */
- uint64_t fault_count;
+ uint32_t tile_align_w;
+ uint32_t tile_align_h;
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
struct disk_cache *disk_cache;
- struct tu_memory_heap heap;
-
- struct vk_sync_type syncobj_type;
- struct vk_sync_timeline_type timeline_type;
- const struct vk_sync_type *sync_types[3];
+ struct tu_device_extension_table supported_extensions;
};
enum tu_debug_flags
{
TU_DEBUG_STARTUP = 1 << 0,
TU_DEBUG_NIR = 1 << 1,
- TU_DEBUG_NOBIN = 1 << 3,
- TU_DEBUG_SYSMEM = 1 << 4,
- TU_DEBUG_FORCEBIN = 1 << 5,
- TU_DEBUG_NOUBWC = 1 << 6,
- TU_DEBUG_NOMULTIPOS = 1 << 7,
- TU_DEBUG_NOLRZ = 1 << 8,
- TU_DEBUG_PERFC = 1 << 9,
- TU_DEBUG_FLUSHALL = 1 << 10,
- TU_DEBUG_SYNCDRAW = 1 << 11,
- TU_DEBUG_DONT_CARE_AS_LOAD = 1 << 12,
- TU_DEBUG_GMEM = 1 << 13,
- TU_DEBUG_RAST_ORDER = 1 << 14,
- TU_DEBUG_UNALIGNED_STORE = 1 << 15,
+ TU_DEBUG_IR3 = 1 << 2,
};
struct tu_instance
{
- struct vk_instance vk;
+ VK_LOADER_DATA _loader_data;
+
+ VkAllocationCallbacks alloc;
uint32_t api_version;
int physical_device_count;
struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES];
- struct driOptionCache dri_options;
- struct driOptionCache available_dri_options;
-
enum tu_debug_flags debug_flags;
+
+ struct vk_debug_report_instance debug_report_callbacks;
+
+ struct tu_instance_extension_table enabled_extensions;
};
VkResult
@@ -297,19 +354,10 @@ bool
tu_physical_device_extension_supported(struct tu_physical_device *dev,
const char *name);
-enum tu_bo_alloc_flags
-{
- TU_BO_ALLOC_NO_FLAGS = 0,
- TU_BO_ALLOC_ALLOW_DUMP = 1 << 0,
- TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1,
-};
-
struct cache_entry;
struct tu_pipeline_cache
{
- struct vk_object_base base;
-
struct tu_device *device;
pthread_mutex_t mutex;
@@ -326,313 +374,115 @@ struct tu_pipeline_key
{
};
+void
+tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
+ struct tu_device *device);
+void
+tu_pipeline_cache_finish(struct tu_pipeline_cache *cache);
+void
+tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
+ const void *data,
+ size_t size);
-/* queue types */
-#define TU_QUEUE_GENERAL 0
-
-#define TU_MAX_QUEUE_FAMILIES 1
-
-/* Keep tu_syncobj until porting to common code for kgsl too */
-#ifdef TU_USE_KGSL
-struct tu_syncobj;
-#endif
-struct tu_u_trace_syncobj;
-
-/* Define tu_timeline_sync type based on drm syncobj for a point type
- * for vk_sync_timeline, and the logic to handle is mostly copied from
- * anv_bo_sync since it seems it can be used by similar way to anv.
- */
-enum tu_timeline_sync_state {
- /** Indicates that this is a new (or newly reset fence) */
- TU_TIMELINE_SYNC_STATE_RESET,
-
- /** Indicates that this fence has been submitted to the GPU but is still
- * (as far as we know) in use by the GPU.
- */
- TU_TIMELINE_SYNC_STATE_SUBMITTED,
-
- TU_TIMELINE_SYNC_STATE_SIGNALED,
-};
-
-struct tu_timeline_sync {
- struct vk_sync base;
-
- enum tu_timeline_sync_state state;
- uint32_t syncobj;
-};
-
-struct tu_queue
-{
- struct vk_queue vk;
-
- struct tu_device *device;
+struct tu_shader_variant;
- uint32_t msm_queue_id;
- int fence;
-};
+bool
+tu_create_shader_variants_from_pipeline_cache(
+ struct tu_device *device,
+ struct tu_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct tu_shader_variant **variants);
-struct tu_bo
+void
+tu_pipeline_cache_insert_shaders(struct tu_device *device,
+ struct tu_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct tu_shader_variant **variants,
+ const void *const *codes,
+ const unsigned *code_sizes);
+
+struct tu_meta_state
{
- uint32_t gem_handle;
- uint64_t size;
- uint64_t iova;
- void *map;
- int32_t refcnt;
-
-#ifndef TU_USE_KGSL
- uint32_t bo_list_idx;
-#endif
+ VkAllocationCallbacks alloc;
- bool implicit_sync : 1;
+ struct tu_pipeline_cache cache;
};
-/* externally-synchronized BO suballocator. */
-struct tu_suballocator
-{
- struct tu_device *dev;
-
- uint32_t default_size;
- enum tu_bo_alloc_flags flags;
-
- /** Current BO we're suballocating out of. */
- struct tu_bo *bo;
- uint32_t next_offset;
+/* queue types */
+#define TU_QUEUE_GENERAL 0
- /** Optional BO cached for recycling as the next suballoc->bo, instead of having to allocate one. */
- struct tu_bo *cached_bo;
-};
+#define TU_MAX_QUEUE_FAMILIES 1
-struct tu_suballoc_bo
+struct tu_fence
{
- struct tu_bo *bo;
- uint64_t iova;
- uint32_t size; /* bytes */
+ bool signaled;
+ int fd;
};
void
-tu_bo_suballocator_init(struct tu_suballocator *suballoc,
- struct tu_device *dev,
- uint32_t default_size,
- uint32_t flags);
+tu_fence_init(struct tu_fence *fence, bool signaled);
void
-tu_bo_suballocator_finish(struct tu_suballocator *suballoc);
-
-VkResult
-tu_suballoc_bo_alloc(struct tu_suballoc_bo *suballoc_bo, struct tu_suballocator *suballoc,
- uint32_t size, uint32_t align);
-
-void *
-tu_suballoc_bo_map(struct tu_suballoc_bo *bo);
-
+tu_fence_finish(struct tu_fence *fence);
void
-tu_suballoc_bo_free(struct tu_suballocator *suballoc, struct tu_suballoc_bo *bo);
-
-enum global_shader {
- GLOBAL_SH_VS_BLIT,
- GLOBAL_SH_VS_CLEAR,
- GLOBAL_SH_FS_BLIT,
- GLOBAL_SH_FS_BLIT_ZSCALE,
- GLOBAL_SH_FS_COPY_MS,
- GLOBAL_SH_FS_CLEAR0,
- GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS,
- GLOBAL_SH_COUNT,
-};
-
-/**
- * Tracks the results from an individual renderpass. Initially created
- * per renderpass, and appended to the tail of at->pending_results. At a later
- * time, when the GPU has finished writing the results, we fill samples_passed.
- */
-struct tu_renderpass_result {
- /* Points into GPU memory */
- struct tu_renderpass_samples* samples;
-
- struct tu_suballoc_bo bo;
-
- /*
- * Below here, only used internally within autotune
- */
- uint64_t rp_key;
- struct tu_renderpass_history *history;
- struct list_head node;
- uint32_t fence;
- uint64_t samples_passed;
-};
-
-#define TU_BORDER_COLOR_COUNT 4096
-#define TU_BORDER_COLOR_BUILTIN 6
-
-#define TU_BLIT_SHADER_SIZE 1024
+tu_fence_update_fd(struct tu_fence *fence, int fd);
+void
+tu_fence_copy(struct tu_fence *fence, const struct tu_fence *src);
+void
+tu_fence_signal(struct tu_fence *fence);
+void
+tu_fence_wait_idle(struct tu_fence *fence);
-/* This struct defines the layout of the global_bo */
-struct tu6_global
+struct tu_queue
{
- /* clear/blit shaders */
- uint32_t shaders[TU_BLIT_SHADER_SIZE];
-
- uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
- uint32_t _pad0;
- volatile uint32_t vsc_draw_overflow;
- uint32_t _pad1;
- volatile uint32_t vsc_prim_overflow;
- uint32_t _pad2;
- uint64_t predicate;
-
- /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
- struct {
- uint32_t offset;
- uint32_t pad[7];
- } flush_base[4];
-
- ALIGN16 uint32_t cs_indirect_xyz[3];
-
- /* To know when renderpass stats for autotune are valid */
- volatile uint32_t autotune_fence;
+ VK_LOADER_DATA _loader_data;
+ struct tu_device *device;
+ uint32_t queue_family_index;
+ int queue_idx;
+ VkDeviceQueueCreateFlags flags;
- /* note: larger global bo will be used for customBorderColors */
- struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
+ uint32_t msm_queue_id;
+ struct tu_fence submit_fence;
};
-#define gb_offset(member) offsetof(struct tu6_global, member)
-#define global_iova(cmd, member) ((cmd)->device->global_bo->iova + gb_offset(member))
-
-/* extra space in vsc draw/prim streams */
-#define VSC_PAD 0x40
struct tu_device
{
- struct vk_device vk;
+ VK_LOADER_DATA _loader_data;
+
+ VkAllocationCallbacks alloc;
+
struct tu_instance *instance;
+ struct tu_meta_state meta_state;
+
struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES];
int queue_count[TU_MAX_QUEUE_FAMILIES];
struct tu_physical_device *physical_device;
- int fd;
struct ir3_compiler *compiler;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
-#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
-
- /* Currently the kernel driver uses a 32-bit GPU address space, but it
- * should be impossible to go beyond 48 bits.
- */
- struct {
- struct tu_bo *bo;
- mtx_t construct_mtx;
- bool initialized;
- } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
-
- struct tu_bo *global_bo;
-
- uint32_t implicit_sync_bo_count;
-
- /* Device-global BO suballocator for reducing BO management overhead for
- * (read-only) pipeline state. Synchronized by pipeline_mutex.
- */
- struct tu_suballocator pipeline_suballoc;
- mtx_t pipeline_mutex;
-
- /* Device-global BO suballocator for reducing BO management for small
- * gmem/sysmem autotune result buffers. Synchronized by autotune_mutex.
- */
- struct tu_suballocator autotune_suballoc;
- mtx_t autotune_mutex;
-
- /* the blob seems to always use 8K factor and 128K param sizes, copy them */
-#define TU_TESS_FACTOR_SIZE (8 * 1024)
-#define TU_TESS_PARAM_SIZE (128 * 1024)
-#define TU_TESS_BO_SIZE (TU_TESS_FACTOR_SIZE + TU_TESS_PARAM_SIZE)
- /* Lazily allocated, protected by the device mutex. */
- struct tu_bo *tess_bo;
-
- struct ir3_shader_variant *global_shaders[GLOBAL_SH_COUNT];
- uint64_t global_shader_va[GLOBAL_SH_COUNT];
-
- uint32_t vsc_draw_strm_pitch;
- uint32_t vsc_prim_strm_pitch;
- BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT);
- mtx_t mutex;
-
- /* bo list for submits: */
- struct drm_msm_gem_submit_bo *bo_list;
- /* map bo handles to bo list index: */
- uint32_t bo_count, bo_list_size;
- mtx_t bo_mutex;
- /* protects imported BOs creation/freeing */
- struct u_rwlock dma_bo_lock;
-
- /* This array holds all our 'struct tu_bo' allocations. We use this
- * so we can add a refcount to our BOs and check if a particular BO
- * was already allocated in this device using its GEM handle. This is
- * necessary to properly manage BO imports, because the kernel doesn't
- * refcount the underlying BO memory.
- *
- * Specifically, when self-importing (i.e. importing a BO into the same
- * device that created it), the kernel will give us the same BO handle
- * for both BOs and we must only free it once when both references are
- * freed. Otherwise, if we are not self-importing, we get two different BO
- * handles, and we want to free each one individually.
- *
- * The refcount is also useful for being able to maintain BOs across
- * VK object lifetimes, such as pipelines suballocating out of BOs
- * allocated on the device.
- */
- struct util_sparse_array bo_map;
-
- /* Command streams to set pass index to a scratch reg */
- struct tu_cs *perfcntrs_pass_cs;
- struct tu_cs_entry *perfcntrs_pass_cs_entries;
-
- /* Condition variable for timeline semaphore to notify waiters when a
- * new submit is executed. */
- pthread_cond_t timeline_cond;
- pthread_mutex_t submit_mutex;
-
- struct tu_autotune autotune;
-
-#ifdef ANDROID
- const void *gralloc;
- enum {
- TU_GRALLOC_UNKNOWN,
- TU_GRALLOC_CROS,
- TU_GRALLOC_OTHER,
- } gralloc_type;
-#endif
+ struct list_head shader_slabs;
+ mtx_t shader_slab_mutex;
- uint32_t submit_count;
-
- struct u_trace_context trace_context;
-
- #ifdef HAVE_PERFETTO
- struct tu_perfetto_state perfetto;
- #endif
+ struct tu_device_extension_table enabled_extensions;
};
-void tu_init_clear_blit_shaders(struct tu_device *dev);
-
-void tu_destroy_clear_blit_shaders(struct tu_device *dev);
-
-VkResult
-tu_device_submit_deferred_locked(struct tu_device *dev);
-
-VkResult
-tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj);
-
-uint64_t
-tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts);
-
-VkResult
-tu_device_check_status(struct vk_device *vk_device);
+struct tu_bo
+{
+ uint32_t gem_handle;
+ uint64_t size;
+ uint64_t iova;
+ void *map;
+};
VkResult
-tu_bo_init_new(struct tu_device *dev, struct tu_bo **bo, uint64_t size,
- enum tu_bo_alloc_flags flags);
+tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
VkResult
tu_bo_init_dmabuf(struct tu_device *dev,
- struct tu_bo **bo,
+ struct tu_bo *bo,
uint64_t size,
int fd);
int
@@ -642,28 +492,6 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
-static inline struct tu_bo *
-tu_device_lookup_bo(struct tu_device *device, uint32_t handle)
-{
- return (struct tu_bo *) util_sparse_array_get(&device->bo_map, handle);
-}
-
-static inline struct tu_bo *
-tu_bo_get_ref(struct tu_bo *bo)
-{
- p_atomic_inc(&bo->refcnt);
- return bo;
-}
-
-/* Get a scratch bo for use inside a command buffer. This will always return
- * the same bo given the same size or similar sizes, so only one scratch bo
- * can be used at the same time. It's meant for short-lived things where we
- * need to write to some piece of memory, read from it, and then immediately
- * discard it.
- */
-VkResult
-tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
-
struct tu_cs_entry
{
/* No ownership */
@@ -673,58 +501,6 @@ struct tu_cs_entry
uint32_t offset;
};
-struct tu_cs_memory {
- uint32_t *map;
- uint64_t iova;
-};
-
-struct tu_draw_state {
- uint64_t iova : 48;
- uint32_t size : 16;
-};
-
-enum tu_dynamic_state
-{
- /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
- TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
- TU_DYNAMIC_STATE_RB_DEPTH_CNTL,
- TU_DYNAMIC_STATE_RB_STENCIL_CNTL,
- TU_DYNAMIC_STATE_VB_STRIDE,
- TU_DYNAMIC_STATE_RASTERIZER_DISCARD,
- TU_DYNAMIC_STATE_COUNT,
- /* no associated draw state: */
- TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
- TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE,
- /* re-use the line width enum as it uses GRAS_SU_CNTL: */
- TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,
-};
-
-enum tu_draw_state_group_id
-{
- TU_DRAW_STATE_PROGRAM_CONFIG,
- TU_DRAW_STATE_PROGRAM,
- TU_DRAW_STATE_PROGRAM_BINNING,
- TU_DRAW_STATE_VB,
- TU_DRAW_STATE_VI,
- TU_DRAW_STATE_VI_BINNING,
- TU_DRAW_STATE_RAST,
- TU_DRAW_STATE_BLEND,
- TU_DRAW_STATE_SHADER_GEOM_CONST,
- TU_DRAW_STATE_FS_CONST,
- TU_DRAW_STATE_DESC_SETS,
- TU_DRAW_STATE_DESC_SETS_LOAD,
- TU_DRAW_STATE_VS_PARAMS,
- TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
- TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
- TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE,
- TU_DRAW_STATE_PRIM_MODE_GMEM,
- TU_DRAW_STATE_PRIM_MODE_SYSMEM,
-
- /* dynamic state related draw states */
- TU_DRAW_STATE_DYNAMIC,
- TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
-};
-
enum tu_cs_mode
{
@@ -765,7 +541,6 @@ struct tu_cs
uint32_t *reserved_end;
uint32_t *end;
- struct tu_device *device;
enum tu_cs_mode mode;
uint32_t next_bo_size;
@@ -776,20 +551,20 @@ struct tu_cs
struct tu_bo **bos;
uint32_t bo_count;
uint32_t bo_capacity;
-
- /* Optional BO that this CS is sub-allocated from for TU_CS_MODE_SUB_STREAM */
- struct tu_bo *refcount_bo;
-
- /* state for cond_exec_start/cond_exec_end */
- uint32_t cond_flags;
- uint32_t *cond_dwords;
};
struct tu_device_memory
{
- struct vk_object_base base;
+ struct tu_bo bo;
+ VkDeviceSize size;
- struct tu_bo *bo;
+ /* for dedicated allocations */
+ struct tu_image *image;
+ struct tu_buffer *buffer;
+
+ uint32_t type_index;
+ void *map;
+ void *user_ptr;
};
struct tu_descriptor_range
@@ -800,19 +575,18 @@ struct tu_descriptor_range
struct tu_descriptor_set
{
- struct vk_object_base base;
-
- /* Link to descriptor pool's desc_sets list . */
- struct list_head pool_link;
-
- struct tu_descriptor_set_layout *layout;
- struct tu_descriptor_pool *pool;
+ const struct tu_descriptor_set_layout *layout;
uint32_t size;
uint64_t va;
uint32_t *mapped_ptr;
+ struct tu_descriptor_range *dynamic_descriptors;
+};
- uint32_t *dynamic_descriptors;
+struct tu_push_descriptor_set
+{
+ struct tu_descriptor_set set;
+ uint32_t capacity;
};
struct tu_descriptor_pool_entry
@@ -824,18 +598,13 @@ struct tu_descriptor_pool_entry
struct tu_descriptor_pool
{
- struct vk_object_base base;
-
- struct tu_bo *bo;
+ uint8_t *mapped_ptr;
uint64_t current_offset;
uint64_t size;
uint8_t *host_memory_base;
uint8_t *host_memory_ptr;
uint8_t *host_memory_end;
- uint8_t *host_bo;
-
- struct list_head desc_sets;
uint32_t entry_count;
uint32_t max_entry_count;
@@ -866,13 +635,11 @@ struct tu_descriptor_update_template_entry
size_t src_stride;
/* For push descriptors */
- const struct tu_sampler *immutable_samplers;
+ const uint32_t *immutable_samplers;
};
struct tu_descriptor_update_template
{
- struct vk_object_base base;
-
uint32_t entry_count;
VkPipelineBindPoint bind_point;
struct tu_descriptor_update_template_entry entry[0];
@@ -880,257 +647,175 @@ struct tu_descriptor_update_template
struct tu_buffer
{
- struct vk_object_base base;
-
VkDeviceSize size;
VkBufferUsageFlags usage;
VkBufferCreateFlags flags;
struct tu_bo *bo;
- uint64_t iova;
+ VkDeviceSize bo_offset;
};
-const char *
-tu_get_debug_option_name(int id);
-
-const char *
-tu_get_perftest_option_name(int id);
+enum tu_dynamic_state_bits
+{
+ TU_DYNAMIC_VIEWPORT = 1 << 0,
+ TU_DYNAMIC_SCISSOR = 1 << 1,
+ TU_DYNAMIC_LINE_WIDTH = 1 << 2,
+ TU_DYNAMIC_DEPTH_BIAS = 1 << 3,
+ TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4,
+ TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5,
+ TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
+ TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
+ TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
+ TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
+ TU_DYNAMIC_ALL = (1 << 10) - 1,
+};
+
+struct tu_vertex_binding
+{
+ struct tu_buffer *buffer;
+ VkDeviceSize offset;
+};
-struct tu_descriptor_state
+struct tu_viewport_state
{
- struct tu_descriptor_set *sets[MAX_SETS];
- struct tu_descriptor_set push_set;
- uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
+ uint32_t count;
+ VkViewport viewports[MAX_VIEWPORTS];
};
-enum tu_cmd_dirty_bits
+struct tu_scissor_state
{
- TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
- TU_CMD_DIRTY_VB_STRIDE = BIT(1),
- TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
- TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
- TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
- TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
- TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
- TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
- TU_CMD_DIRTY_LRZ = BIT(8),
- TU_CMD_DIRTY_VS_PARAMS = BIT(9),
- TU_CMD_DIRTY_RASTERIZER_DISCARD = BIT(10),
- TU_CMD_DIRTY_VIEWPORTS = BIT(11),
- /* all draw states were disabled and need to be re-enabled: */
- TU_CMD_DIRTY_DRAW_STATE = BIT(12)
+ uint32_t count;
+ VkRect2D scissors[MAX_SCISSORS];
};
-/* There are only three cache domains we have to care about: the CCU, or
- * color cache unit, which is used for color and depth/stencil attachments
- * and copy/blit destinations, and is split conceptually into color and depth,
- * and the universal cache or UCHE which is used for pretty much everything
- * else, except for the CP (uncached) and host. We need to flush whenever data
- * crosses these boundaries.
- */
+struct tu_discard_rectangle_state
+{
+ uint32_t count;
+ VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
+};
-enum tu_cmd_access_mask {
- TU_ACCESS_UCHE_READ = 1 << 0,
- TU_ACCESS_UCHE_WRITE = 1 << 1,
- TU_ACCESS_CCU_COLOR_READ = 1 << 2,
- TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,
- TU_ACCESS_CCU_DEPTH_READ = 1 << 4,
- TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,
-
- /* Experiments have shown that while it's safe to avoid flushing the CCU
- * after each blit/renderpass, it's not safe to assume that subsequent
- * lookups with a different attachment state will hit unflushed cache
- * entries. That is, the CCU needs to be flushed and possibly invalidated
- * when accessing memory with a different attachment state. Writing to an
- * attachment under the following conditions after clearing using the
- * normal 2d engine path is known to have issues:
- *
- * - It isn't the 0'th layer.
- * - There are more than one attachment, and this isn't the 0'th attachment
- * (this seems to also depend on the cpp of the attachments).
- *
- * Our best guess is that the layer/MRT state is used when computing
- * the location of a cache entry in CCU, to avoid conflicts. We assume that
- * any access in a renderpass after or before an access by a transfer needs
- * a flush/invalidate, and use the _INCOHERENT variants to represent access
- * by a renderpass.
+struct tu_dynamic_state
+{
+ /**
+ * Bitmask of (1 << VK_DYNAMIC_STATE_*).
+ * Defines the set of saved dynamic state.
*/
- TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,
- TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,
- TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,
- TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,
+ uint32_t mask;
- /* Accesses which bypasses any cache. e.g. writes via the host,
- * CP_EVENT_WRITE::BLIT, and the CP are SYSMEM_WRITE.
- */
- TU_ACCESS_SYSMEM_READ = 1 << 10,
- TU_ACCESS_SYSMEM_WRITE = 1 << 11,
+ struct tu_viewport_state viewport;
- /* Memory writes from the CP start in-order with draws and event writes,
- * but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read.
- */
- TU_ACCESS_CP_WRITE = 1 << 12,
-
- TU_ACCESS_READ =
- TU_ACCESS_UCHE_READ |
- TU_ACCESS_CCU_COLOR_READ |
- TU_ACCESS_CCU_DEPTH_READ |
- TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
- TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
- TU_ACCESS_SYSMEM_READ,
-
- TU_ACCESS_WRITE =
- TU_ACCESS_UCHE_WRITE |
- TU_ACCESS_CCU_COLOR_WRITE |
- TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |
- TU_ACCESS_CCU_DEPTH_WRITE |
- TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
- TU_ACCESS_SYSMEM_WRITE |
- TU_ACCESS_CP_WRITE,
-
- TU_ACCESS_ALL =
- TU_ACCESS_READ |
- TU_ACCESS_WRITE,
-};
+ struct tu_scissor_state scissor;
-/* Starting with a6xx, the pipeline is split into several "clusters" (really
- * pipeline stages). Each stage has its own pair of register banks and can
- * switch them independently, so that earlier stages can run ahead of later
- * ones. e.g. the FS of draw N and the VS of draw N + 1 can be executing at
- * the same time.
- *
- * As a result of this, we need to insert a WFI when an earlier stage depends
- * on the result of a later stage. CP_DRAW_* and CP_BLIT will wait for any
- * pending WFI's to complete before starting, and usually before reading
- * indirect params even, so a WFI also acts as a full "pipeline stall".
- *
- * Note, the names of the stages come from CLUSTER_* in devcoredump. We
- * include all the stages for completeness, even ones which do not read/write
- * anything.
- */
+ float line_width;
-enum tu_stage {
- /* This doesn't correspond to a cluster, but we need it for tracking
- * indirect draw parameter reads etc.
- */
- TU_STAGE_CP,
+ struct
+ {
+ float bias;
+ float clamp;
+ float slope;
+ } depth_bias;
- /* - Fetch index buffer
- * - Fetch vertex attributes, dispatch VS
- */
- TU_STAGE_FE,
+ float blend_constants[4];
- /* Execute all geometry stages (VS thru GS) */
- TU_STAGE_SP_VS,
+ struct
+ {
+ float min;
+ float max;
+ } depth_bounds;
- /* Write to VPC, do primitive assembly. */
- TU_STAGE_PC_VS,
+ struct
+ {
+ uint32_t front;
+ uint32_t back;
+ } stencil_compare_mask;
- /* Rasterization. RB_DEPTH_BUFFER_BASE only exists in CLUSTER_PS according
- * to devcoredump so presumably this stage stalls for TU_STAGE_PS when
- * early depth testing is enabled before dispatching fragments? However
- * GRAS reads and writes LRZ directly.
- */
- TU_STAGE_GRAS,
+ struct
+ {
+ uint32_t front;
+ uint32_t back;
+ } stencil_write_mask;
- /* Execute FS */
- TU_STAGE_SP_PS,
+ struct
+ {
+ uint32_t front;
+ uint32_t back;
+ } stencil_reference;
- /* - Fragment tests
- * - Write color/depth
- * - Streamout writes (???)
- * - Varying interpolation (???)
- */
- TU_STAGE_PS,
+ struct tu_discard_rectangle_state discard_rectangle;
};
-enum tu_cmd_flush_bits {
- TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
- TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
- TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
- TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
- TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
- TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
- TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
- TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
- TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
-
- TU_CMD_FLAG_ALL_FLUSH =
- TU_CMD_FLAG_CCU_FLUSH_DEPTH |
- TU_CMD_FLAG_CCU_FLUSH_COLOR |
- TU_CMD_FLAG_CACHE_FLUSH |
- /* Treat the CP as a sort of "cache" which may need to be "flushed" via
- * waiting for writes to land with WAIT_FOR_MEM_WRITES.
- */
- TU_CMD_FLAG_WAIT_MEM_WRITES,
-
- TU_CMD_FLAG_ALL_INVALIDATE =
- TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
- TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
- TU_CMD_FLAG_CACHE_INVALIDATE |
- /* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a
- * a command that needs CP_WAIT_FOR_ME is executed. This means we may
- * insert an extra WAIT_FOR_ME before an indirect command requiring it
- * in case there was another command before the current command buffer
- * that it needs to wait for.
- */
- TU_CMD_FLAG_WAIT_FOR_ME,
-};
+extern const struct tu_dynamic_state default_dynamic_state;
-/* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty
- * heavy, involving a CCU cache flush/invalidate and a WFI in order to change
- * which part of the gmem is used by the CCU. Here we keep track of what the
- * state of the CCU.
- */
-enum tu_cmd_ccu_state {
- TU_CMD_CCU_SYSMEM,
- TU_CMD_CCU_GMEM,
- TU_CMD_CCU_UNKNOWN,
-};
+const char *
+tu_get_debug_option_name(int id);
-struct tu_cache_state {
- /* Caches which must be made available (flushed) eventually if there are
- * any users outside that cache domain, and caches which must be
- * invalidated eventually if there are any reads.
- */
- enum tu_cmd_flush_bits pending_flush_bits;
- /* Pending flushes */
- enum tu_cmd_flush_bits flush_bits;
-};
+const char *
+tu_get_perftest_option_name(int id);
-enum tu_lrz_force_disable_mask {
- TU_LRZ_FORCE_DISABLE_LRZ = 1 << 0,
- TU_LRZ_FORCE_DISABLE_WRITE = 1 << 1,
+/**
+ * Attachment state when recording a renderpass instance.
+ *
+ * The clear value is valid only if there exists a pending clear.
+ */
+struct tu_attachment_state
+{
+ VkImageAspectFlags pending_clear_aspects;
+ uint32_t cleared_views;
+ VkClearValue clear_value;
+ VkImageLayout current_layout;
};
-enum tu_lrz_direction {
- TU_LRZ_UNKNOWN,
- /* Depth func less/less-than: */
- TU_LRZ_LESS,
- /* Depth func greater/greater-than: */
- TU_LRZ_GREATER,
+struct tu_descriptor_state
+{
+ struct tu_descriptor_set *sets[MAX_SETS];
+ uint32_t dirty;
+ uint32_t valid;
+ struct tu_push_descriptor_set push_set;
+ bool push_dirty;
+ uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
};
-struct tu_lrz_pipeline
+struct tu_tile
{
- uint32_t force_disable_mask;
- bool fs_has_kill;
- bool force_late_z;
- bool early_fragment_tests;
+ uint8_t pipe;
+ uint8_t slot;
+ VkOffset2D begin;
+ VkOffset2D end;
};
-struct tu_lrz_state
+struct tu_tiling_config
{
- /* Depth/Stencil image currently on use to do LRZ */
- struct tu_image *image;
- bool valid : 1;
- enum tu_lrz_direction prev_direction;
+ VkRect2D render_area;
+ uint32_t buffer_cpp[MAX_RTS + 2];
+ uint32_t buffer_count;
+
+ /* position and size of the first tile */
+ VkRect2D tile0;
+ /* number of tiles */
+ VkExtent2D tile_count;
+
+ uint32_t gmem_offsets[MAX_RTS + 2];
+
+ /* size of the first VSC pipe */
+ VkExtent2D pipe0;
+ /* number of VSC pipes */
+ VkExtent2D pipe_count;
+
+ /* pipe register values */
+ uint32_t pipe_config[MAX_VSC_PIPES];
+ uint32_t pipe_sizes[MAX_VSC_PIPES];
};
-struct tu_vs_params {
- uint32_t vertex_offset;
- uint32_t first_instance;
+enum tu_cmd_dirty_bits
+{
+ TU_CMD_DIRTY_PIPELINE = 1 << 0,
+ TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 1,
+
+ TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16,
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17,
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18,
+ TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19,
};
struct tu_cmd_state
@@ -1138,119 +823,48 @@ struct tu_cmd_state
uint32_t dirty;
struct tu_pipeline *pipeline;
- struct tu_pipeline *compute_pipeline;
- /* Vertex buffers, viewports, and scissors
- * the states for these can be updated partially, so we need to save these
- * to be able to emit a complete draw state
- */
- struct {
- uint64_t base;
- uint32_t size;
- uint32_t stride;
- } vb[MAX_VBS];
- VkViewport viewport[MAX_VIEWPORTS];
- VkRect2D scissor[MAX_SCISSORS];
- uint32_t max_viewport, max_scissor;
-
- /* for dynamic states that can't be emitted directly */
- uint32_t dynamic_stencil_mask;
- uint32_t dynamic_stencil_wrmask;
- uint32_t dynamic_stencil_ref;
-
- uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
- uint32_t pc_raster_cntl, vpc_unknown_9107;
- enum pc_di_primtype primtype;
- bool primitive_restart_enable;
-
- /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
- struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
- struct tu_draw_state vertex_buffers;
- struct tu_draw_state shader_const[2];
- struct tu_draw_state desc_sets;
-
- struct tu_draw_state vs_params;
+ /* Vertex buffers */
+ struct
+ {
+ struct tu_buffer *buffers[MAX_VBS];
+ VkDeviceSize offsets[MAX_VBS];
+ } vb;
+
+ struct tu_dynamic_state dynamic;
/* Index buffer */
- uint64_t index_va;
+ struct tu_buffer *index_buffer;
+ uint64_t index_offset;
+ uint32_t index_type;
uint32_t max_index_count;
- uint8_t index_size;
-
- /* because streamout base has to be 32-byte aligned
- * there is an extra offset to deal with when it is
- * unaligned
- */
- uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];
-
- /* Renderpasses are tricky, because we may need to flush differently if
- * using sysmem vs. gmem and therefore we have to delay any flushing that
- * happens before a renderpass. So we have to have two copies of the flush
- * state, one for intra-renderpass flushes (i.e. renderpass dependencies)
- * and one for outside a renderpass.
- */
- struct tu_cache_state cache;
- struct tu_cache_state renderpass_cache;
-
- enum tu_cmd_ccu_state ccu_state;
+ uint64_t index_va;
const struct tu_render_pass *pass;
const struct tu_subpass *subpass;
const struct tu_framebuffer *framebuffer;
- VkRect2D render_area;
-
- const struct tu_image_view **attachments;
+ struct tu_attachment_state *attachments;
- bool xfb_used;
- bool has_tess;
- bool tessfactor_addr_set;
- bool has_subpass_predication;
- bool predication_active;
- bool disable_gmem;
- enum a5xx_line_mode line_mode;
- bool z_negative_one_to_one;
+ struct tu_tiling_config tiling_config;
- uint32_t drawcall_count;
-
- /* A calculated "draw cost" value for renderpass, which tries to
- * estimate the bandwidth-per-sample of all the draws according
- * to:
- *
- * foreach_draw (...) {
- * cost += num_frag_outputs;
- * if (blend_enabled)
- * cost += num_blend_enabled;
- * if (depth_test_enabled)
- * cost++;
- * if (depth_write_enabled)
- * cost++;
- * }
- *
- * The idea is that each sample-passed minimally does one write
- * per MRT. If blend is enabled, the hw will additionally do
- * a framebuffer read per sample-passed (for each MRT with blend
- * enabled). If depth-test is enabled, the hw will additionally
- * a depth buffer read. If depth-write is enable, the hw will
- * additionally do a depth buffer write.
- *
- * This does ignore depth buffer traffic for samples which do not
- * pass do to depth-test fail, and some other details. But it is
- * just intended to be a rough estimate that is easy to calculate.
- */
- uint32_t total_drawcalls_cost;
-
- struct tu_lrz_state lrz;
-
- struct tu_draw_state lrz_and_depth_plane_state;
-
- struct tu_vs_params last_vs_params;
+ struct tu_cs_entry tile_load_ib;
+ struct tu_cs_entry tile_store_ib;
};
struct tu_cmd_pool
{
- struct vk_command_pool vk;
-
+ VkAllocationCallbacks alloc;
struct list_head cmd_buffers;
struct list_head free_cmd_buffers;
+ uint32_t queue_family_index;
+};
+
+struct tu_cmd_buffer_upload
+{
+ uint8_t *map;
+ unsigned offset;
+ uint64_t size;
+ struct list_head list;
};
enum tu_cmd_buffer_status
@@ -1262,116 +876,165 @@ enum tu_cmd_buffer_status
TU_CMD_BUFFER_STATUS_PENDING,
};
+struct tu_bo_list
+{
+ uint32_t count;
+ uint32_t capacity;
+ struct drm_msm_gem_submit_bo *bo_infos;
+};
+
+#define TU_BO_LIST_FAILED (~0)
+
+void
+tu_bo_list_init(struct tu_bo_list *list);
+void
+tu_bo_list_destroy(struct tu_bo_list *list);
+void
+tu_bo_list_reset(struct tu_bo_list *list);
+uint32_t
+tu_bo_list_add(struct tu_bo_list *list,
+ const struct tu_bo *bo,
+ uint32_t flags);
+VkResult
+tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other);
+
struct tu_cmd_buffer
{
- struct vk_command_buffer vk;
+ VK_LOADER_DATA _loader_data;
struct tu_device *device;
struct tu_cmd_pool *pool;
struct list_head pool_link;
- struct u_trace trace;
- struct u_trace_iterator trace_renderpass_start;
- struct u_trace_iterator trace_renderpass_end;
-
- struct list_head renderpass_autotune_results;
- struct tu_autotune_results_buffer* autotune_buffer;
-
VkCommandBufferUsageFlags usage_flags;
+ VkCommandBufferLevel level;
enum tu_cmd_buffer_status status;
struct tu_cmd_state state;
+ struct tu_vertex_binding vertex_bindings[MAX_VBS];
uint32_t queue_family_index;
- uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
+ uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
VkShaderStageFlags push_constant_stages;
struct tu_descriptor_set meta_push_descriptors;
- struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
+ struct tu_descriptor_state descriptors[VK_PIPELINE_BIND_POINT_RANGE_SIZE];
+
+ struct tu_cmd_buffer_upload upload;
VkResult record_result;
+ struct tu_bo_list bo_list;
struct tu_cs cs;
struct tu_cs draw_cs;
- struct tu_cs tile_store_cs;
- struct tu_cs draw_epilogue_cs;
- struct tu_cs sub_cs;
+ struct tu_cs tile_cs;
- uint32_t vsc_draw_strm_pitch;
- uint32_t vsc_prim_strm_pitch;
-};
+ uint16_t marker_reg;
+ uint32_t marker_seqno;
-/* Temporary struct for tracking a register state to be written, used by
- * a6xx-pack.h and tu_cs_emit_regs()
- */
-struct tu_reg_value {
- uint32_t reg;
- uint64_t value;
- bool is_address;
- struct tu_bo *bo;
- bool bo_write;
- uint32_t bo_offset;
- uint32_t bo_shift;
+ struct tu_bo scratch_bo;
+ uint32_t scratch_seqno;
+
+ bool wait_for_idle;
};
+void
+tu6_emit_event_write(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ enum vgt_event_type event,
+ bool need_seqno);
+
+bool
+tu_get_memory_fd(struct tu_device *device,
+ struct tu_device_memory *memory,
+ int *pFD);
-void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,
- struct tu_cs *cs);
+/*
+ * Takes x,y,z as exact numbers of invocations, instead of blocks.
+ *
+ * Limitations: Can't call normal dispatch functions without binding or
+ * rebinding
+ * the compute pipeline.
+ */
+void
+tu_unaligned_dispatch(struct tu_cmd_buffer *cmd_buffer,
+ uint32_t x,
+ uint32_t y,
+ uint32_t z);
+
+struct tu_event
+{
+ uint64_t *map;
+};
-void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
- struct tu_cs *cs,
- enum tu_cmd_ccu_state ccu_state);
+struct tu_shader_module;
+#define TU_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
+#define TU_HASH_SHADER_SISCHED (1 << 1)
+#define TU_HASH_SHADER_UNSAFE_MATH (1 << 2)
void
-tu6_emit_event_write(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- enum vgt_event_type event);
+tu_hash_shaders(unsigned char *hash,
+ const VkPipelineShaderStageCreateInfo **stages,
+ const struct tu_pipeline_layout *layout,
+ const struct tu_pipeline_key *key,
+ uint32_t flags);
+
+static inline gl_shader_stage
+vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
+{
+ assert(__builtin_popcount(vk_stage) == 1);
+ return ffs(vk_stage) - 1;
+}
-static inline struct tu_descriptor_state *
-tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+static inline VkShaderStageFlagBits
+mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
{
- return &cmd_buffer->descriptors[bind_point];
+ return (1 << mesa_stage);
}
-struct tu_event
+#define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
+
+#define tu_foreach_stage(stage, stage_bits) \
+ for (gl_shader_stage stage, \
+ __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \
+ stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
+
+struct tu_shader_module
{
- struct vk_object_base base;
- struct tu_bo *bo;
+ unsigned char sha1[20];
+
+ uint32_t code_size;
+ const uint32_t *code[0];
};
-struct tu_push_constant_range
+struct tu_shader_compile_options
{
- uint32_t lo;
- uint32_t count;
+ struct ir3_shader_key key;
+
+ bool optimize;
+ bool include_binning_pass;
};
struct tu_shader
{
- struct ir3_shader *ir3_shader;
+ struct ir3_shader ir3_shader;
- struct tu_push_constant_range push_consts;
- uint8_t active_desc_sets;
- bool multi_pos_output;
-};
+ /* This may be true for vertex shaders. When true, variants[1] is the
+ * binning variant and binning_binary is non-NULL.
+ */
+ bool has_binning_pass;
-bool
-tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
- struct tu_device *dev);
+ void *binary;
+ void *binning_binary;
-nir_shader *
-tu_spirv_to_nir(struct tu_device *dev,
- void *mem_ctx,
- const VkPipelineShaderStageCreateInfo *stage_info,
- gl_shader_stage stage);
+ struct ir3_shader_variant variants[0];
+};
struct tu_shader *
tu_shader_create(struct tu_device *dev,
- nir_shader *nir,
+ gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
- unsigned multiview_mask,
- struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc);
void
@@ -1379,78 +1042,50 @@ tu_shader_destroy(struct tu_device *dev,
struct tu_shader *shader,
const VkAllocationCallbacks *alloc);
-struct tu_program_descriptor_linkage
-{
- struct ir3_const_state const_state;
-
- uint32_t constlen;
-
- struct tu_push_constant_range push_consts;
-};
-
-struct tu_pipeline_executable {
- gl_shader_stage stage;
-
- struct ir3_info stats;
- bool is_binning;
+void
+tu_shader_compile_options_init(
+ struct tu_shader_compile_options *options,
+ const VkGraphicsPipelineCreateInfo *pipeline_info);
- char *nir_from_spirv;
- char *nir_final;
- char *disasm;
-};
+VkResult
+tu_shader_compile(struct tu_device *dev,
+ struct tu_shader *shader,
+ const struct tu_shader *next_stage,
+ const struct tu_shader_compile_options *options,
+ const VkAllocationCallbacks *alloc);
struct tu_pipeline
{
- struct vk_object_base base;
-
struct tu_cs cs;
- struct tu_suballoc_bo bo;
- /* Separate BO for private memory since it should GPU writable */
- struct tu_bo *pvtmem_bo;
+ struct tu_dynamic_state dynamic_state;
+
+ struct tu_pipeline_layout *layout;
bool need_indirect_descriptor_sets;
VkShaderStageFlags active_stages;
- uint32_t active_desc_sets;
-
- /* mask of enabled dynamic states
- * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
- */
- uint32_t dynamic_state_mask;
- struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
-
- /* for dynamic states which use the same register: */
- uint32_t gras_su_cntl, gras_su_cntl_mask;
- uint32_t rb_depth_cntl, rb_depth_cntl_mask;
- uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
- uint32_t pc_raster_cntl, pc_raster_cntl_mask;
- uint32_t vpc_unknown_9107, vpc_unknown_9107_mask;
- uint32_t stencil_wrmask;
-
- bool rb_depth_cntl_disable;
-
- enum a5xx_line_mode line_mode;
-
- /* draw states for the pipeline */
- struct tu_draw_state load_state, rast_state, blend_state;
- struct tu_draw_state prim_order_state_sysmem, prim_order_state_gmem;
-
- /* for vertex buffers state */
- uint32_t num_vbs;
struct
{
- struct tu_draw_state config_state;
- struct tu_draw_state state;
- struct tu_draw_state binning_state;
-
- struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
+ struct tu_bo binary_bo;
+ struct tu_cs_entry state_ib;
+ struct tu_cs_entry binning_state_ib;
} program;
struct
{
- struct tu_draw_state state;
- struct tu_draw_state binning_state;
+ uint8_t bindings[MAX_VERTEX_ATTRIBS];
+ uint16_t strides[MAX_VERTEX_ATTRIBS];
+ uint16_t offsets[MAX_VERTEX_ATTRIBS];
+ uint32_t count;
+
+ uint8_t binning_bindings[MAX_VERTEX_ATTRIBS];
+ uint16_t binning_strides[MAX_VERTEX_ATTRIBS];
+ uint16_t binning_offsets[MAX_VERTEX_ATTRIBS];
+ uint32_t binning_count;
+
+ struct tu_cs_entry state_ib;
+ struct tu_cs_entry binning_state_ib;
} vi;
struct
@@ -1461,47 +1096,36 @@ struct tu_pipeline
struct
{
- uint32_t patch_type;
- uint32_t param_stride;
- bool upper_left_domain_origin;
- } tess;
+ struct tu_cs_entry state_ib;
+ } vp;
struct
{
- uint32_t local_size[3];
- uint32_t subgroup_size;
- } compute;
-
- bool provoking_vertex_last;
-
- struct tu_lrz_pipeline lrz;
+ uint32_t gras_su_cntl;
+ struct tu_cs_entry state_ib;
+ } rast;
- /* In other words - framebuffer fetch support */
- bool raster_order_attachment_access;
- bool subpass_feedback_loop_ds;
-
- bool z_negative_one_to_one;
-
- /* Base drawcall cost for sysmem vs gmem autotuner */
- uint8_t drawcall_base_cost;
+ struct
+ {
+ struct tu_cs_entry state_ib;
+ } ds;
- void *executables_mem_ctx;
- /* tu_pipeline_executable */
- struct util_dynarray executables;
+ struct
+ {
+ struct tu_cs_entry state_ib;
+ } blend;
};
void
-tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport,
- bool z_negative_one_to_one);
+tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport);
void
-tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count);
+tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor);
void
-tu6_clear_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_image* image, const VkClearValue *value);
-
-void
-tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
+tu6_emit_gras_su_cntl(struct tu_cs *cs,
+ uint32_t gras_su_cntl,
+ float line_width);
void
tu6_emit_depth_bias(struct tu_cs *cs,
@@ -1509,143 +1133,106 @@ tu6_emit_depth_bias(struct tu_cs *cs,
float clamp,
float slope_factor);
-void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples,
- enum a5xx_line_mode line_mode);
-
-void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
-
-void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
-
-void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
-
-void tu6_apply_depth_bounds_workaround(struct tu_device *device,
- uint32_t *rb_depth_cntl);
-
-struct tu_pvtmem_config {
- uint64_t iova;
- uint32_t per_fiber_size;
- uint32_t per_sp_size;
- bool per_wave;
-};
-
-void
-tu6_emit_xs_config(struct tu_cs *cs,
- gl_shader_stage stage,
- const struct ir3_shader_variant *xs);
-
-void
-tu6_emit_xs(struct tu_cs *cs,
- gl_shader_stage stage,
- const struct ir3_shader_variant *xs,
- const struct tu_pvtmem_config *pvtmem,
- uint64_t binary_iova);
-
-void
-tu6_emit_vpc(struct tu_cs *cs,
- const struct ir3_shader_variant *vs,
- const struct ir3_shader_variant *hs,
- const struct ir3_shader_variant *ds,
- const struct ir3_shader_variant *gs,
- const struct ir3_shader_variant *fs,
- uint32_t patch_control_points);
-
void
-tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
-
-struct tu_image_view;
+tu6_emit_stencil_compare_mask(struct tu_cs *cs,
+ uint32_t front,
+ uint32_t back);
void
-tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- const struct tu_image_view *src,
- const struct tu_image_view *dst,
- uint32_t layer_mask,
- uint32_t layers,
- const VkRect2D *rect);
+tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back);
void
-tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- const VkRenderPassBeginInfo *info);
+tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back);
void
-tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- const VkRenderPassBeginInfo *info);
+tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
-void
-tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- bool force_load);
+struct tu_userdata_info *
+tu_lookup_user_sgpr(struct tu_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx);
-/* expose this function to be able to emit load without checking LOAD_OP */
-void
-tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
+struct tu_shader_variant *
+tu_get_shader(struct tu_pipeline *pipeline, gl_shader_stage stage);
-/* note: gmem store can also resolve */
-void
-tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t a,
- uint32_t gmem_a);
-
-enum pipe_format tu_vk_format_to_pipe_format(VkFormat vk_format);
+struct tu_graphics_pipeline_create_info
+{
+ bool use_rectlist;
+ bool db_depth_clear;
+ bool db_stencil_clear;
+ bool db_depth_disable_expclear;
+ bool db_stencil_disable_expclear;
+ bool db_flush_depth_inplace;
+ bool db_flush_stencil_inplace;
+ bool db_resummarize;
+ uint32_t custom_blend_mode;
+};
struct tu_native_format
{
- enum a6xx_format fmt : 8;
- enum a3xx_color_swap swap : 8;
- enum a6xx_tile_mode tile_mode : 8;
+ int vtx; /* VFMTn_xxx or -1 */
+ int tex; /* TFMTn_xxx or -1 */
+ int rb; /* RBn_xxx or -1 */
+ int swap; /* enum a3xx_color_swap */
+ bool present; /* internal only; always true to external users */
};
-enum pipe_format tu_vk_format_to_pipe_format(VkFormat vk_format);
-bool tu6_format_vtx_supported(VkFormat format);
-struct tu_native_format tu6_format_vtx(VkFormat format);
-bool tu6_format_color_supported(enum pipe_format format);
-struct tu_native_format tu6_format_color(enum pipe_format format, enum a6xx_tile_mode tile_mode);
-bool tu6_format_texture_supported(enum pipe_format format);
-struct tu_native_format tu6_format_texture(enum pipe_format format, enum a6xx_tile_mode tile_mode);
+const struct tu_native_format *
+tu6_get_native_format(VkFormat format);
+
+int
+tu_pack_clear_value(const VkClearValue *val,
+ VkFormat format,
+ uint32_t buf[4]);
+enum a6xx_2d_ifmt tu6_rb_fmt_to_ifmt(enum a6xx_color_fmt fmt);
-static inline enum a6xx_format
-tu6_base_format(enum pipe_format format)
+struct tu_image_level
{
- /* note: tu6_format_color doesn't care about tiling for .fmt field */
- return tu6_format_color(format, TILE6_LINEAR).fmt;
-}
+ VkDeviceSize offset;
+ VkDeviceSize size;
+ uint32_t pitch;
+};
struct tu_image
{
- struct vk_object_base base;
-
+ VkImageType type;
/* The original VkFormat provided by the client. This may not match any
* of the actual surface formats.
*/
VkFormat vk_format;
+ VkImageAspectFlags aspects;
+ VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+ VkImageTiling tiling; /** VkImageCreateInfo::tiling */
+ VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
+ VkExtent3D extent;
uint32_t level_count;
uint32_t layer_count;
- struct fdl_layout layout[3];
- uint32_t total_size;
+ VkDeviceSize size;
+ uint32_t alignment;
+
+ /* memory layout */
+ VkDeviceSize layer_size;
+ struct tu_image_level levels[15];
+ unsigned tile_mode;
+
+ unsigned queue_family_mask;
+ bool exclusive;
+ bool shareable;
-#ifdef ANDROID
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
VkDeviceMemory owned_memory;
-#endif
/* Set when bound */
- struct tu_bo *bo;
- uint64_t iova;
-
- uint32_t lrz_height;
- uint32_t lrz_pitch;
- uint32_t lrz_offset;
-
- bool shareable;
+ const struct tu_bo *bo;
+ VkDeviceSize bo_offset;
};
+unsigned
+tu_image_queue_family_mask(const struct tu_image *image,
+ uint32_t family,
+ uint32_t queue_family);
+
static inline uint32_t
tu_get_layerCount(const struct tu_image *image,
const VkImageSubresourceRange *range)
@@ -1664,108 +1251,99 @@ tu_get_levelCount(const struct tu_image *image,
: range->levelCount;
}
-enum pipe_format tu6_plane_format(VkFormat format, uint32_t plane);
-
-uint32_t tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask);
-
-enum pipe_format tu_format_for_aspect(enum pipe_format format,
- VkImageAspectFlags aspect_mask);
-
struct tu_image_view
{
- struct vk_object_base base;
-
struct tu_image *image; /**< VkImageViewCreateInfo::image */
- struct fdl6_view view;
+ VkImageViewType type;
+ VkImageAspectFlags aspect_mask;
+ VkFormat vk_format;
+ uint32_t base_layer;
+ uint32_t layer_count;
+ uint32_t base_mip;
+ uint32_t level_count;
+ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
- /* for d32s8 separate depth */
- uint64_t depth_base_addr;
- uint32_t depth_layer_size;
- uint32_t depth_PITCH;
+ uint32_t descriptor[16];
- /* for d32s8 separate stencil */
- uint64_t stencil_base_addr;
- uint32_t stencil_layer_size;
- uint32_t stencil_PITCH;
+ /* Descriptor for use as a storage image as opposed to a sampled image.
+ * This has a few differences for cube maps (e.g. type).
+ */
+ uint32_t storage_descriptor[16];
};
-struct tu_sampler_ycbcr_conversion {
- struct vk_object_base base;
-
- VkFormat format;
- VkSamplerYcbcrModelConversion ycbcr_model;
- VkSamplerYcbcrRange ycbcr_range;
- VkComponentMapping components;
- VkChromaLocation chroma_offsets[2];
- VkFilter chroma_filter;
+struct tu_sampler
+{
};
-struct tu_sampler {
- struct vk_object_base base;
-
- uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
- struct tu_sampler_ycbcr_conversion *ycbcr_sampler;
+struct tu_image_create_info
+{
+ const VkImageCreateInfo *vk_info;
+ bool scanout;
+ bool no_metadata_planes;
};
-void
-tu_cs_image_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer);
-
-void
-tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src);
-
-void
-tu_cs_image_flag_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer);
-
-void
-tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
-
-void
-tu_cs_image_depth_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
-
-#define tu_image_view_stencil(iview, x) \
- ((iview->view.x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT))
-
-#define tu_image_view_depth(iview, x) \
- ((iview->view.x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_32_FLOAT))
-
VkResult
-tu_gralloc_info(struct tu_device *device,
- const VkNativeBufferANDROID *gralloc_info,
- int *dma_buf,
- uint64_t *modifier);
+tu_image_create(VkDevice _device,
+ const struct tu_image_create_info *info,
+ const VkAllocationCallbacks *alloc,
+ VkImage *pImage);
VkResult
-tu_import_memory_from_gralloc_handle(VkDevice device_h,
- int dma_buf,
- const VkAllocationCallbacks *alloc,
- VkImage image_h);
+tu_image_from_gralloc(VkDevice device_h,
+ const VkImageCreateInfo *base_info,
+ const VkNativeBufferANDROID *gralloc_info,
+ const VkAllocationCallbacks *alloc,
+ VkImage *out_image_h);
void
-tu_image_view_init(struct tu_image_view *iview,
- const VkImageViewCreateInfo *pCreateInfo,
- bool limited_z24s8);
-
-bool
-tiling_possible(VkFormat format);
-
-bool
-ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage, VkImageUsageFlags stencil_usage,
- const struct fd_dev_info *info, VkSampleCountFlagBits samples);
+tu_image_view_init(struct tu_image_view *view,
+ struct tu_device *device,
+ const VkImageViewCreateInfo *pCreateInfo);
struct tu_buffer_view
{
- struct vk_object_base base;
-
- uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
-
- struct tu_buffer *buffer;
+ VkFormat vk_format;
+ uint64_t range; /**< VkBufferViewCreateInfo::range */
+ uint32_t state[4];
};
void
tu_buffer_view_init(struct tu_buffer_view *view,
struct tu_device *device,
const VkBufferViewCreateInfo *pCreateInfo);
+static inline struct VkExtent3D
+tu_sanitize_image_extent(const VkImageType imageType,
+ const struct VkExtent3D imageExtent)
+{
+ switch (imageType) {
+ case VK_IMAGE_TYPE_1D:
+ return (VkExtent3D) { imageExtent.width, 1, 1 };
+ case VK_IMAGE_TYPE_2D:
+ return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
+ case VK_IMAGE_TYPE_3D:
+ return imageExtent;
+ default:
+ unreachable("invalid image type");
+ }
+}
+
+static inline struct VkOffset3D
+tu_sanitize_image_offset(const VkImageType imageType,
+ const struct VkOffset3D imageOffset)
+{
+ switch (imageType) {
+ case VK_IMAGE_TYPE_1D:
+ return (VkOffset3D) { imageOffset.x, 0, 0 };
+ case VK_IMAGE_TYPE_2D:
+ return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
+ case VK_IMAGE_TYPE_3D:
+ return imageOffset;
+ default:
+ unreachable("invalid image type");
+ }
+}
+
struct tu_attachment_info
{
struct tu_image_view *attachment;
@@ -1773,146 +1351,100 @@ struct tu_attachment_info
struct tu_framebuffer
{
- struct vk_object_base base;
-
uint32_t width;
uint32_t height;
uint32_t layers;
- /* size of the first tile */
- VkExtent2D tile0;
- /* number of tiles */
- VkExtent2D tile_count;
-
- /* size of the first VSC pipe */
- VkExtent2D pipe0;
- /* number of VSC pipes */
- VkExtent2D pipe_count;
-
- /* pipe register values */
- uint32_t pipe_config[MAX_VSC_PIPES];
- uint32_t pipe_sizes[MAX_VSC_PIPES];
-
uint32_t attachment_count;
struct tu_attachment_info attachments[0];
};
-void
-tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
- const struct tu_device *device,
- const struct tu_render_pass *pass);
-
-struct tu_subpass_barrier {
+struct tu_subpass_barrier
+{
VkPipelineStageFlags src_stage_mask;
- VkPipelineStageFlags dst_stage_mask;
VkAccessFlags src_access_mask;
VkAccessFlags dst_access_mask;
- bool incoherent_ccu_color, incoherent_ccu_depth;
};
+void
+tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer,
+ const struct tu_subpass_barrier *barrier);
+
struct tu_subpass_attachment
{
uint32_t attachment;
-
- /* For input attachments, true if it needs to be patched to refer to GMEM
- * in GMEM mode. This is false if it hasn't already been written as an
- * attachment.
- */
- bool patch_input_gmem;
+ VkImageLayout layout;
};
struct tu_subpass
{
uint32_t input_count;
uint32_t color_count;
- uint32_t resolve_count;
- bool resolve_depth_stencil;
-
- bool feedback_loop_color;
- bool feedback_loop_ds;
-
- /* True if we must invalidate UCHE thanks to a feedback loop. */
- bool feedback_invalidate;
-
- /* In other words - framebuffer fetch support */
- bool raster_order_attachment_access;
-
struct tu_subpass_attachment *input_attachments;
struct tu_subpass_attachment *color_attachments;
struct tu_subpass_attachment *resolve_attachments;
struct tu_subpass_attachment depth_stencil_attachment;
- VkSampleCountFlagBits samples;
-
- uint32_t srgb_cntl;
- uint32_t multiview_mask;
+ /** Subpass has at least one resolve attachment */
+ bool has_resolve;
struct tu_subpass_barrier start_barrier;
+
+ uint32_t view_mask;
+ VkSampleCountFlagBits max_sample_count;
};
struct tu_render_pass_attachment
{
VkFormat format;
uint32_t samples;
- uint32_t cpp;
- VkImageAspectFlags clear_mask;
- uint32_t clear_views;
- bool load;
- bool store;
- int32_t gmem_offset;
- /* for D32S8 separate stencil: */
- bool load_stencil;
- bool store_stencil;
- int32_t gmem_offset_stencil;
+ VkAttachmentLoadOp load_op;
+ VkAttachmentLoadOp stencil_load_op;
+ VkImageLayout initial_layout;
+ VkImageLayout final_layout;
+ uint32_t view_mask;
};
struct tu_render_pass
{
- struct vk_object_base base;
-
uint32_t attachment_count;
uint32_t subpass_count;
- uint32_t gmem_pixels;
- uint32_t tile_align_w;
struct tu_subpass_attachment *subpass_attachments;
struct tu_render_pass_attachment *attachments;
struct tu_subpass_barrier end_barrier;
struct tu_subpass subpasses[0];
};
-#define PERF_CNTRS_REG 4
-
-struct tu_perf_query_data
-{
- uint32_t gid; /* group-id */
- uint32_t cid; /* countable-id within the group */
- uint32_t cntr_reg; /* counter register within the group */
- uint32_t pass; /* pass index that countables can be requested */
- uint32_t app_idx; /* index provided by apps */
-};
+VkResult
+tu_device_init_meta(struct tu_device *device);
+void
+tu_device_finish_meta(struct tu_device *device);
struct tu_query_pool
{
- struct vk_object_base base;
-
- VkQueryType type;
uint32_t stride;
+ uint32_t availability_offset;
uint64_t size;
- uint32_t pipeline_statistics;
- struct tu_bo *bo;
+ char *ptr;
+ VkQueryType type;
+ uint32_t pipeline_stats_mask;
+};
- /* For performance query */
- const struct fd_perfcntr_group *perf_group;
- uint32_t perf_group_count;
- uint32_t counter_index_count;
- struct tu_perf_query_data perf_query_data[0];
+struct tu_semaphore
+{
+ uint32_t syncobj;
+ uint32_t temp_syncobj;
};
-uint32_t
-tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
+void
+tu_set_descriptor_set(struct tu_cmd_buffer *cmd_buffer,
+ VkPipelineBindPoint bind_point,
+ struct tu_descriptor_set *set,
+ unsigned idx);
void
-tu_update_descriptor_sets(const struct tu_device *device,
+tu_update_descriptor_sets(struct tu_device *device,
+ struct tu_cmd_buffer *cmd_buffer,
VkDescriptorSet overrideSet,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
@@ -1921,24 +1453,25 @@ tu_update_descriptor_sets(const struct tu_device *device,
void
tu_update_descriptor_set_with_template(
- const struct tu_device *device,
+ struct tu_device *device,
+ struct tu_cmd_buffer *cmd_buffer,
struct tu_descriptor_set *set,
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const void *pData);
-VkResult
-tu_physical_device_init(struct tu_physical_device *device,
- struct tu_instance *instance);
-VkResult
-tu_enumerate_devices(struct tu_instance *instance);
+void
+tu_meta_push_descriptor_set(struct tu_cmd_buffer *cmd_buffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout,
+ uint32_t set,
+ uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites);
int
-tu_device_get_gpu_timestamp(struct tu_device *dev,
- uint64_t *ts);
+tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id);
int
-tu_device_get_suspend_count(struct tu_device *dev,
- uint64_t *suspend_count);
+tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size);
int
tu_drm_submitqueue_new(const struct tu_device *dev,
@@ -1948,116 +1481,76 @@ tu_drm_submitqueue_new(const struct tu_device *dev,
void
tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id);
+uint32_t
+tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags);
+uint32_t
+tu_gem_import_dmabuf(const struct tu_device *dev,
+ int prime_fd,
+ uint64_t size);
int
-tu_signal_syncs(struct tu_device *device, struct vk_sync *sync1, struct vk_sync *sync2);
-
-int
-tu_syncobj_to_fd(struct tu_device *device, struct vk_sync *sync);
-
-VkResult
-tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit);
-
+tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle);
void
-tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
- void *ts_from, uint32_t from_offset,
- void *ts_to, uint32_t to_offset,
- uint32_t count);
-
-
-VkResult
-tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
- struct u_trace **trace_copy);
-
-/* If we copy trace and timestamps we will have to free them. */
-struct tu_u_trace_cmd_data
-{
- struct tu_cs *timestamp_copy_cs;
- struct u_trace *trace;
-};
-
-/* Data necessary to retrieve timestamps and clean all
- * associated resources afterwards.
- */
-struct tu_u_trace_submission_data
-{
- uint32_t submission_id;
- /* We have to know when timestamps are available,
- * this sync object indicates it.
- */
- struct tu_u_trace_syncobj *syncobj;
-
- uint32_t cmd_buffer_count;
- uint32_t last_buffer_with_tracepoints;
- struct tu_u_trace_cmd_data *cmd_trace_data;
-};
-
-VkResult
-tu_u_trace_submission_data_create(
- struct tu_device *device,
- struct tu_cmd_buffer **cmd_buffers,
- uint32_t cmd_buffer_count,
- struct tu_u_trace_submission_data **submission_data);
-
-void
-tu_u_trace_submission_data_finish(
- struct tu_device *device,
- struct tu_u_trace_submission_data *submission_data);
+tu_gem_close(const struct tu_device *dev, uint32_t gem_handle);
+uint64_t
+tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle);
+uint64_t
+tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle);
+
+#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
+ \
+ static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
+ { \
+ return (struct __tu_type *) _handle; \
+ } \
+ \
+ static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
+ { \
+ return (__VkType) _obj; \
+ }
+
+#define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \
+ \
+ static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
+ { \
+ return (struct __tu_type *) (uintptr_t) _handle; \
+ } \
+ \
+ static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
+ { \
+ return (__VkType)(uintptr_t) _obj; \
+ }
#define TU_FROM_HANDLE(__tu_type, __name, __handle) \
- VK_FROM_HANDLE(__tu_type, __name, __handle)
-
-VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer,
- VK_OBJECT_TYPE_COMMAND_BUFFER)
-VK_DEFINE_HANDLE_CASTS(tu_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
-VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
- VK_OBJECT_TYPE_INSTANCE)
-VK_DEFINE_HANDLE_CASTS(tu_physical_device, vk.base, VkPhysicalDevice,
- VK_OBJECT_TYPE_PHYSICAL_DEVICE)
-VK_DEFINE_HANDLE_CASTS(tu_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
-
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, vk.base, VkCommandPool,
- VK_OBJECT_TYPE_COMMAND_POOL)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, base, VkBuffer,
- VK_OBJECT_TYPE_BUFFER)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, base, VkBufferView,
- VK_OBJECT_TYPE_BUFFER_VIEW)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, base, VkDescriptorPool,
- VK_OBJECT_TYPE_DESCRIPTOR_POOL)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, base, VkDescriptorSet,
- VK_OBJECT_TYPE_DESCRIPTOR_SET)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout, base,
- VkDescriptorSetLayout,
- VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template, base,
- VkDescriptorUpdateTemplate,
- VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, base, VkDeviceMemory,
- VK_OBJECT_TYPE_DEVICE_MEMORY)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, base, VkFramebuffer,
- VK_OBJECT_TYPE_FRAMEBUFFER)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, base, VkImageView,
- VK_OBJECT_TYPE_IMAGE_VIEW);
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, base, VkPipelineCache,
- VK_OBJECT_TYPE_PIPELINE_CACHE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, base, VkPipeline,
- VK_OBJECT_TYPE_PIPELINE)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, base, VkPipelineLayout,
- VK_OBJECT_TYPE_PIPELINE_LAYOUT)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, base, VkQueryPool,
- VK_OBJECT_TYPE_QUERY_POOL)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, base, VkRenderPass,
- VK_OBJECT_TYPE_RENDER_PASS)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, base, VkSampler,
- VK_OBJECT_TYPE_SAMPLER)
-VK_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, base, VkSamplerYcbcrConversion,
- VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
-
-/* for TU_FROM_HANDLE with both VkFence and VkSemaphore: */
-#define tu_syncobj_from_handle(x) ((struct tu_syncobj*) (uintptr_t) (x))
-
-void
-update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask);
+ struct __tu_type *__name = __tu_type##_from_handle(__handle)
+
+TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer)
+TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice)
+TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance)
+TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice)
+TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue)
+
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout,
+ VkDescriptorSetLayout)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template,
+ VkDescriptorUpdateTemplate)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_fence, VkFence)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView);
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_shader_module, VkShaderModule)
+TU_DEFINE_NONDISP_HANDLE_CASTS(tu_semaphore, VkSemaphore)
#endif /* TU_PRIVATE_H */