/************************************************************************** * * Copyright 2017 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include "util/u_threaded_context.h" #include "util/u_cpu_detect.h" #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" /* 0 = disabled, 1 = assertions, 2 = printfs */ #define TC_DEBUG 0 #if TC_DEBUG >= 1 #define tc_assert assert #else #define tc_assert(x) #endif #if TC_DEBUG >= 2 #define tc_printf printf #define tc_asprintf asprintf #define tc_strcmp strcmp #else #define tc_printf(...) #define tc_asprintf(...) 0 #define tc_strcmp(...) 0 #endif #define TC_SENTINEL 0x5ca1ab1e enum tc_call_id { #define CALL(name) TC_CALL_##name, #include "u_threaded_context_calls.h" #undef CALL TC_NUM_CALLS, }; typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload); static const tc_execute execute_func[TC_NUM_CALLS]; static void tc_batch_check(struct tc_batch *batch) { tc_assert(batch->sentinel == TC_SENTINEL); tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH); } static void tc_debug_check(struct threaded_context *tc) { for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { tc_batch_check(&tc->batch_slots[i]); tc_assert(tc->batch_slots[i].pipe == tc->pipe); } } static void tc_batch_execute(void *job, int thread_index) { struct tc_batch *batch = job; struct pipe_context *pipe = batch->pipe; struct tc_call *last = &batch->call[batch->num_total_call_slots]; tc_batch_check(batch); for (struct tc_call *iter = batch->call; iter != last; iter += iter->num_call_slots) { tc_assert(iter->sentinel == TC_SENTINEL); execute_func[iter->call_id](pipe, &iter->payload); } tc_batch_check(batch); batch->num_total_call_slots = 0; } static void tc_batch_flush(struct threaded_context *tc) { struct tc_batch *next = &tc->batch_slots[tc->next]; tc_assert(next->num_total_call_slots != 0); tc_batch_check(next); tc_debug_check(tc); p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots); util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute, NULL); tc->last = tc->next; tc->next = (tc->next + 1) % TC_MAX_BATCHES; } /* This is the function that adds variable-sized calls into the current * batch. It also flushes the batch if there is not enough space there. * All other higher-level "add" functions use it. */ static union tc_payload * tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id, unsigned payload_size) { struct tc_batch *next = &tc->batch_slots[tc->next]; unsigned total_size = offsetof(struct tc_call, payload) + payload_size; unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call)); tc_debug_check(tc); if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) { tc_batch_flush(tc); next = &tc->batch_slots[tc->next]; tc_assert(next->num_total_call_slots == 0); } tc_assert(util_queue_fence_is_signalled(&next->fence)); struct tc_call *call = &next->call[next->num_total_call_slots]; next->num_total_call_slots += num_call_slots; call->sentinel = TC_SENTINEL; call->call_id = id; call->num_call_slots = num_call_slots; tc_debug_check(tc); return &call->payload; } #define tc_add_struct_typed_call(tc, execute, type) \ ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type))) #define tc_add_slot_based_call(tc, execute, type, num_slots) \ ((struct type*)tc_add_sized_call(tc, execute, \ sizeof(struct type) + \ sizeof(((struct type*)NULL)->slot[0]) * \ (num_slots))) static union tc_payload * tc_add_small_call(struct threaded_context *tc, enum tc_call_id id) { return tc_add_sized_call(tc, id, 0); } static void _tc_sync(struct threaded_context *tc, const char *info, const char *func) { struct tc_batch *last = &tc->batch_slots[tc->last]; struct tc_batch *next = &tc->batch_slots[tc->next]; bool synced = false; tc_debug_check(tc); /* Only wait for queued calls... */ if (!util_queue_fence_is_signalled(&last->fence)) { util_queue_fence_wait(&last->fence); synced = true; } tc_debug_check(tc); /* .. and execute unflushed calls directly. */ if (next->num_total_call_slots) { p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots); tc_batch_execute(next, 0); synced = true; } if (synced) { p_atomic_inc(&tc->num_syncs); if (tc_strcmp(func, "tc_destroy") != 0) tc_printf("sync %s %s\n", func, info); } tc_debug_check(tc); } #define tc_sync(tc) _tc_sync(tc, "", __func__) #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__) static void tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src) { *dst = NULL; pipe_resource_reference(dst, src); } void threaded_resource_init(struct pipe_resource *res) { struct threaded_resource *tres = threaded_resource(res); tres->latest = &tres->b; util_range_init(&tres->valid_buffer_range); tres->base_valid_buffer_range = &tres->valid_buffer_range; tres->is_shared = false; tres->is_user_ptr = false; } void threaded_resource_deinit(struct pipe_resource *res) { struct threaded_resource *tres = threaded_resource(res); if (tres->latest != &tres->b) pipe_resource_reference(&tres->latest, NULL); util_range_destroy(&tres->valid_buffer_range); } struct pipe_context * threaded_context_unwrap_sync(struct pipe_context *pipe) { if (!pipe || !pipe->priv) return pipe; tc_sync(threaded_context(pipe)); return (struct pipe_context*)pipe->priv; } /******************************************************************** * simple functions */ #define TC_FUNC1(func, m_payload, qualifier, type, deref, deref2) \ static void \ tc_call_##func(struct pipe_context *pipe, union tc_payload *payload) \ { \ pipe->func(pipe, deref2((type*)payload)); \ } \ \ static void \ tc_##func(struct pipe_context *_pipe, qualifier type deref param) \ { \ struct threaded_context *tc = threaded_context(_pipe); \ type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \ *p = deref(param); \ } TC_FUNC1(set_active_query_state, flags, , boolean, , *) TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, ) TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, ) TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, ) TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *) TC_FUNC1(set_min_samples, min_samples, , unsigned, , *) TC_FUNC1(set_polygon_stipple, polygon_stipple, const, struct pipe_poly_stipple, *, ) TC_FUNC1(texture_barrier, flags, , unsigned, , *) TC_FUNC1(memory_barrier, flags, , unsigned, , *) /******************************************************************** * queries */ static struct pipe_query * tc_create_query(struct pipe_context *_pipe, unsigned query_type, unsigned index) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; return pipe->create_query(pipe, query_type, index); } static struct pipe_query * tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries, unsigned *query_types) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; return pipe->create_batch_query(pipe, num_queries, query_types); } static void tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload) { pipe->destroy_query(pipe, payload->query); } static void tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_query *tq = threaded_query(query); if (tq->head_unflushed.next) LIST_DEL(&tq->head_unflushed); tc_add_small_call(tc, TC_CALL_destroy_query)->query = query; } static void tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload) { pipe->begin_query(pipe, payload->query); } static boolean tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { struct threaded_context *tc = threaded_context(_pipe); union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query); payload->query = query; return true; /* we don't care about the return value for this call */ } static void tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload) { pipe->end_query(pipe, payload->query); } static bool tc_end_query(struct pipe_context *_pipe, struct pipe_query *query) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_query *tq = threaded_query(query); union tc_payload *payload = tc_add_small_call(tc, TC_CALL_end_query); payload->query = query; tq->flushed = false; if (!tq->head_unflushed.next) LIST_ADD(&tq->head_unflushed, &tc->unflushed_queries); return true; /* we don't care about the return value for this call */ } static boolean tc_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, union pipe_query_result *result) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_query *tq = threaded_query(query); struct pipe_context *pipe = tc->pipe; if (!tq->flushed) tc_sync_msg(tc, wait ? "wait" : "nowait"); bool success = pipe->get_query_result(pipe, query, wait, result); if (success) { tq->flushed = true; if (tq->head_unflushed.next) LIST_DEL(&tq->head_unflushed); } return success; } struct tc_query_result_resource { struct pipe_query *query; boolean wait; enum pipe_query_value_type result_type; int index; struct pipe_resource *resource; unsigned offset; }; static void tc_call_get_query_result_resource(struct pipe_context *pipe, union tc_payload *payload) { struct tc_query_result_resource *p = (struct tc_query_result_resource *)payload; pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type, p->index, p->resource, p->offset); pipe_resource_reference(&p->resource, NULL); } static void tc_get_query_result_resource(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, enum pipe_query_value_type result_type, int index, struct pipe_resource *resource, unsigned offset) { struct threaded_context *tc = threaded_context(_pipe); struct tc_query_result_resource *p = tc_add_struct_typed_call(tc, TC_CALL_get_query_result_resource, tc_query_result_resource); p->query = query; p->wait = wait; p->result_type = result_type; p->index = index; tc_set_resource_reference(&p->resource, resource); p->offset = offset; } struct tc_render_condition { struct pipe_query *query; bool condition; unsigned mode; }; static void tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload) { struct tc_render_condition *p = (struct tc_render_condition *)payload; pipe->render_condition(pipe, p->query, p->condition, p->mode); } static void tc_render_condition(struct pipe_context *_pipe, struct pipe_query *query, boolean condition, enum pipe_render_cond_flag mode) { struct threaded_context *tc = threaded_context(_pipe); struct tc_render_condition *p = tc_add_struct_typed_call(tc, TC_CALL_render_condition, tc_render_condition); p->query = query; p->condition = condition; p->mode = mode; } /******************************************************************** * constant (immutable) states */ #define TC_CSO_CREATE(name, sname) \ static void * \ tc_create_##name##_state(struct pipe_context *_pipe, \ const struct pipe_##sname##_state *state) \ { \ struct pipe_context *pipe = threaded_context(_pipe)->pipe; \ return pipe->create_##name##_state(pipe, state); \ } #define TC_CSO_BIND(name) TC_FUNC1(bind_##name##_state, cso, , void *, , *) #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, cso, , void *, , *) #define TC_CSO_WHOLE2(name, sname) \ TC_CSO_CREATE(name, sname) \ TC_CSO_BIND(name) \ TC_CSO_DELETE(name) #define TC_CSO_WHOLE(name) TC_CSO_WHOLE2(name, name) TC_CSO_WHOLE(blend) TC_CSO_WHOLE(rasterizer) TC_CSO_WHOLE(depth_stencil_alpha) TC_CSO_WHOLE(compute) TC_CSO_WHOLE2(fs, shader) TC_CSO_WHOLE2(vs, shader) TC_CSO_WHOLE2(gs, shader) TC_CSO_WHOLE2(tcs, shader) TC_CSO_WHOLE2(tes, shader) TC_CSO_CREATE(sampler, sampler) TC_CSO_DELETE(sampler) TC_CSO_BIND(vertex_elements) TC_CSO_DELETE(vertex_elements) static void * tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count, const struct pipe_vertex_element *elems) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; return pipe->create_vertex_elements_state(pipe, count, elems); } struct tc_sampler_states { ubyte shader, start, count; void *slot[0]; /* more will be allocated if needed */ }; static void tc_call_bind_sampler_states(struct pipe_context *pipe, union tc_payload *payload) { struct tc_sampler_states *p = (struct tc_sampler_states *)payload; pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot); } static void tc_bind_sampler_states(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, void **states) { if (!count) return; struct threaded_context *tc = threaded_context(_pipe); struct tc_sampler_states *p = tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count); p->shader = shader; p->start = start; p->count = count; memcpy(p->slot, states, count * sizeof(states[0])); } /******************************************************************** * immediate states */ static void tc_call_set_framebuffer_state(struct pipe_context *pipe, union tc_payload *payload) { struct pipe_framebuffer_state *p = (struct pipe_framebuffer_state *)payload; pipe->set_framebuffer_state(pipe, p); unsigned nr_cbufs = p->nr_cbufs; for (unsigned i = 0; i < nr_cbufs; i++) pipe_surface_reference(&p->cbufs[i], NULL); pipe_surface_reference(&p->zsbuf, NULL); } static void tc_set_framebuffer_state(struct pipe_context *_pipe, const struct pipe_framebuffer_state *fb) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_framebuffer_state *p = tc_add_struct_typed_call(tc, TC_CALL_set_framebuffer_state, pipe_framebuffer_state); unsigned nr_cbufs = fb->nr_cbufs; p->width = fb->width; p->height = fb->height; p->samples = fb->samples; p->layers = fb->layers; p->nr_cbufs = nr_cbufs; for (unsigned i = 0; i < nr_cbufs; i++) { p->cbufs[i] = NULL; pipe_surface_reference(&p->cbufs[i], fb->cbufs[i]); } p->zsbuf = NULL; pipe_surface_reference(&p->zsbuf, fb->zsbuf); } static void tc_call_set_tess_state(struct pipe_context *pipe, union tc_payload *payload) { float *p = (float*)payload; pipe->set_tess_state(pipe, p, p + 4); } static void tc_set_tess_state(struct pipe_context *_pipe, const float default_outer_level[4], const float default_inner_level[2]) { struct threaded_context *tc = threaded_context(_pipe); float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state, sizeof(float) * 6); memcpy(p, default_outer_level, 4 * sizeof(float)); memcpy(p + 4, default_inner_level, 2 * sizeof(float)); } struct tc_constant_buffer { ubyte shader, index; struct pipe_constant_buffer cb; }; static void tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload) { struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload; pipe->set_constant_buffer(pipe, p->shader, p->index, &p->cb); pipe_resource_reference(&p->cb.buffer, NULL); } static void tc_set_constant_buffer(struct pipe_context *_pipe, enum pipe_shader_type shader, uint index, const struct pipe_constant_buffer *cb) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_resource *buffer = NULL; unsigned offset; /* This must be done before adding set_constant_buffer, because it could * generate e.g. transfer_unmap and flush partially-uninitialized * set_constant_buffer to the driver if it was done afterwards. */ if (cb && cb->user_buffer) { u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64, cb->user_buffer, &offset, &buffer); } struct tc_constant_buffer *p = tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer); p->shader = shader; p->index = index; if (cb) { if (cb->user_buffer) { p->cb.buffer_size = cb->buffer_size; p->cb.user_buffer = NULL; p->cb.buffer_offset = offset; p->cb.buffer = buffer; } else { tc_set_resource_reference(&p->cb.buffer, cb->buffer); memcpy(&p->cb, cb, sizeof(*cb)); } } else { memset(&p->cb, 0, sizeof(*cb)); } } struct tc_scissors { ubyte start, count; struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_scissor_states(struct pipe_context *pipe, union tc_payload *payload) { struct tc_scissors *p = (struct tc_scissors *)payload; pipe->set_scissor_states(pipe, p->start, p->count, p->slot); } static void tc_set_scissor_states(struct pipe_context *_pipe, unsigned start, unsigned count, const struct pipe_scissor_state *states) { struct threaded_context *tc = threaded_context(_pipe); struct tc_scissors *p = tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count); p->start = start; p->count = count; memcpy(&p->slot, states, count * sizeof(states[0])); } struct tc_viewports { ubyte start, count; struct pipe_viewport_state slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_viewport_states(struct pipe_context *pipe, union tc_payload *payload) { struct tc_viewports *p = (struct tc_viewports *)payload; pipe->set_viewport_states(pipe, p->start, p->count, p->slot); } static void tc_set_viewport_states(struct pipe_context *_pipe, unsigned start, unsigned count, const struct pipe_viewport_state *states) { if (!count) return; struct threaded_context *tc = threaded_context(_pipe); struct tc_viewports *p = tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count); p->start = start; p->count = count; memcpy(&p->slot, states, count * sizeof(states[0])); } struct tc_window_rects { bool include; ubyte count; struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_window_rectangles(struct pipe_context *pipe, union tc_payload *payload) { struct tc_window_rects *p = (struct tc_window_rects *)payload; pipe->set_window_rectangles(pipe, p->include, p->count, p->slot); } static void tc_set_window_rectangles(struct pipe_context *_pipe, boolean include, unsigned count, const struct pipe_scissor_state *rects) { struct threaded_context *tc = threaded_context(_pipe); struct tc_window_rects *p = tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count); p->include = include; p->count = count; memcpy(p->slot, rects, count * sizeof(rects[0])); } struct tc_sampler_views { ubyte shader, start, count; struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload) { struct tc_sampler_views *p = (struct tc_sampler_views *)payload; unsigned count = p->count; pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot); for (unsigned i = 0; i < count; i++) pipe_sampler_view_reference(&p->slot[i], NULL); } static void tc_set_sampler_views(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, struct pipe_sampler_view **views) { if (!count) return; struct threaded_context *tc = threaded_context(_pipe); struct tc_sampler_views *p = tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, count); p->shader = shader; p->start = start; p->count = count; if (views) { for (unsigned i = 0; i < count; i++) { p->slot[i] = NULL; pipe_sampler_view_reference(&p->slot[i], views[i]); } } else { memset(p->slot, 0, count * sizeof(views[0])); } } struct tc_shader_images { ubyte shader, start, count; bool unbind; struct pipe_image_view slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload) { struct tc_shader_images *p = (struct tc_shader_images *)payload; unsigned count = p->count; if (p->unbind) { pipe->set_shader_images(pipe, p->shader, p->start, p->count, NULL); return; } pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot); for (unsigned i = 0; i < count; i++) pipe_resource_reference(&p->slot[i].resource, NULL); } static void tc_set_shader_images(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, const struct pipe_image_view *images) { if (!count) return; struct threaded_context *tc = threaded_context(_pipe); struct tc_shader_images *p = tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images, images ? count : 0); p->shader = shader; p->start = start; p->count = count; p->unbind = images == NULL; if (images) { for (unsigned i = 0; i < count; i++) { tc_set_resource_reference(&p->slot[i].resource, images[i].resource); if (images[i].access & PIPE_IMAGE_ACCESS_WRITE && images[i].resource && images[i].resource->target == PIPE_BUFFER) { struct threaded_resource *tres = threaded_resource(images[i].resource); util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset, images[i].u.buf.offset + images[i].u.buf.size); } } memcpy(p->slot, images, count * sizeof(images[0])); } } struct tc_shader_buffers { ubyte shader, start, count; bool unbind; struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload) { struct tc_shader_buffers *p = (struct tc_shader_buffers *)payload; unsigned count = p->count; if (p->unbind) { pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL); return; } pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot); for (unsigned i = 0; i < count; i++) pipe_resource_reference(&p->slot[i].buffer, NULL); } static void tc_set_shader_buffers(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, const struct pipe_shader_buffer *buffers) { if (!count) return; struct threaded_context *tc = threaded_context(_pipe); struct tc_shader_buffers *p = tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers, buffers ? count : 0); p->shader = shader; p->start = start; p->count = count; p->unbind = buffers == NULL; if (buffers) { for (unsigned i = 0; i < count; i++) { struct pipe_shader_buffer *dst = &p->slot[i]; const struct pipe_shader_buffer *src = buffers + i; tc_set_resource_reference(&dst->buffer, src->buffer); dst->buffer_offset = src->buffer_offset; dst->buffer_size = src->buffer_size; if (src->buffer) { struct threaded_resource *tres = threaded_resource(src->buffer); util_range_add(&tres->valid_buffer_range, src->buffer_offset, src->buffer_offset + src->buffer_size); } } } } struct tc_vertex_buffers { ubyte start, count; bool unbind; struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */ }; static void tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload) { struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload; unsigned count = p->count; if (p->unbind) { pipe->set_vertex_buffers(pipe, p->start, count, NULL); return; } for (unsigned i = 0; i < count; i++) tc_assert(!p->slot[i].is_user_buffer); pipe->set_vertex_buffers(pipe, p->start, count, p->slot); for (unsigned i = 0; i < count; i++) pipe_resource_reference(&p->slot[i].buffer.resource, NULL); } static void tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned start, unsigned count, const struct pipe_vertex_buffer *buffers) { struct threaded_context *tc = threaded_context(_pipe); if (!count) return; if (buffers) { struct tc_vertex_buffers *p = tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count); p->start = start; p->count = count; p->unbind = false; for (unsigned i = 0; i < count; i++) { struct pipe_vertex_buffer *dst = &p->slot[i]; const struct pipe_vertex_buffer *src = buffers + i; tc_assert(!src->is_user_buffer); dst->stride = src->stride; dst->is_user_buffer = false; tc_set_resource_reference(&dst->buffer.resource, src->buffer.resource); dst->buffer_offset = src->buffer_offset; } } else { struct tc_vertex_buffers *p = tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0); p->start = start; p->count = count; p->unbind = true; } } struct tc_stream_outputs { unsigned count; struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; unsigned offsets[PIPE_MAX_SO_BUFFERS]; }; static void tc_call_set_stream_output_targets(struct pipe_context *pipe, union tc_payload *payload) { struct tc_stream_outputs *p = (struct tc_stream_outputs *)payload; unsigned count = p->count; pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets); for (unsigned i = 0; i < count; i++) pipe_so_target_reference(&p->targets[i], NULL); } static void tc_set_stream_output_targets(struct pipe_context *_pipe, unsigned count, struct pipe_stream_output_target **tgs, const unsigned *offsets) { struct threaded_context *tc = threaded_context(_pipe); struct tc_stream_outputs *p = tc_add_struct_typed_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs); for (unsigned i = 0; i < count; i++) { p->targets[i] = NULL; pipe_so_target_reference(&p->targets[i], tgs[i]); } p->count = count; memcpy(p->offsets, offsets, count * sizeof(unsigned)); } static void tc_set_compute_resources(struct pipe_context *_pipe, unsigned start, unsigned count, struct pipe_surface **resources) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->set_compute_resources(pipe, start, count, resources); } static void tc_set_global_binding(struct pipe_context *_pipe, unsigned first, unsigned count, struct pipe_resource **resources, uint32_t **handles) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->set_global_binding(pipe, first, count, resources, handles); } /******************************************************************** * views */ static struct pipe_surface * tc_create_surface(struct pipe_context *_pipe, struct pipe_resource *resource, const struct pipe_surface *surf_tmpl) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; struct pipe_surface *view = pipe->create_surface(pipe, resource, surf_tmpl); if (view) view->context = _pipe; return view; } static void tc_surface_destroy(struct pipe_context *_pipe, struct pipe_surface *surf) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; pipe->surface_destroy(pipe, surf); } static struct pipe_sampler_view * tc_create_sampler_view(struct pipe_context *_pipe, struct pipe_resource *resource, const struct pipe_sampler_view *templ) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; struct pipe_sampler_view *view = pipe->create_sampler_view(pipe, resource, templ); if (view) view->context = _pipe; return view; } static void tc_sampler_view_destroy(struct pipe_context *_pipe, struct pipe_sampler_view *view) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; pipe->sampler_view_destroy(pipe, view); } static struct pipe_stream_output_target * tc_create_stream_output_target(struct pipe_context *_pipe, struct pipe_resource *res, unsigned buffer_offset, unsigned buffer_size) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; struct threaded_resource *tres = threaded_resource(res); struct pipe_stream_output_target *view; tc_sync(threaded_context(_pipe)); util_range_add(&tres->valid_buffer_range, buffer_offset, buffer_offset + buffer_size); view = pipe->create_stream_output_target(pipe, res, buffer_offset, buffer_size); if (view) view->context = _pipe; return view; } static void tc_stream_output_target_destroy(struct pipe_context *_pipe, struct pipe_stream_output_target *target) { struct pipe_context *pipe = threaded_context(_pipe)->pipe; pipe->stream_output_target_destroy(pipe, target); } /******************************************************************** * bindless */ static uint64_t tc_create_texture_handle(struct pipe_context *_pipe, struct pipe_sampler_view *view, const struct pipe_sampler_state *state) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); return pipe->create_texture_handle(pipe, view, state); } static void tc_call_delete_texture_handle(struct pipe_context *pipe, union tc_payload *payload) { pipe->delete_texture_handle(pipe, payload->handle); } static void tc_delete_texture_handle(struct pipe_context *_pipe, uint64_t handle) { struct threaded_context *tc = threaded_context(_pipe); union tc_payload *payload = tc_add_small_call(tc, TC_CALL_delete_texture_handle); payload->handle = handle; } struct tc_make_texture_handle_resident { uint64_t handle; bool resident; }; static void tc_call_make_texture_handle_resident(struct pipe_context *pipe, union tc_payload *payload) { struct tc_make_texture_handle_resident *p = (struct tc_make_texture_handle_resident *)payload; pipe->make_texture_handle_resident(pipe, p->handle, p->resident); } static void tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle, bool resident) { struct threaded_context *tc = threaded_context(_pipe); struct tc_make_texture_handle_resident *p = tc_add_struct_typed_call(tc, TC_CALL_make_texture_handle_resident, tc_make_texture_handle_resident); p->handle = handle; p->resident = resident; } static uint64_t tc_create_image_handle(struct pipe_context *_pipe, const struct pipe_image_view *image) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); return pipe->create_image_handle(pipe, image); } static void tc_call_delete_image_handle(struct pipe_context *pipe, union tc_payload *payload) { pipe->delete_image_handle(pipe, payload->handle); } static void tc_delete_image_handle(struct pipe_context *_pipe, uint64_t handle) { struct threaded_context *tc = threaded_context(_pipe); union tc_payload *payload = tc_add_small_call(tc, TC_CALL_delete_image_handle); payload->handle = handle; } struct tc_make_image_handle_resident { uint64_t handle; unsigned access; bool resident; }; static void tc_call_make_image_handle_resident(struct pipe_context *pipe, union tc_payload *payload) { struct tc_make_image_handle_resident *p = (struct tc_make_image_handle_resident *)payload; pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident); } static void tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle, unsigned access, bool resident) { struct threaded_context *tc = threaded_context(_pipe); struct tc_make_image_handle_resident *p = tc_add_struct_typed_call(tc, TC_CALL_make_image_handle_resident, tc_make_image_handle_resident); p->handle = handle; p->access = access; p->resident = resident; } /******************************************************************** * transfer */ struct tc_replace_buffer_storage { struct pipe_resource *dst; struct pipe_resource *src; tc_replace_buffer_storage_func func; }; static void tc_call_replace_buffer_storage(struct pipe_context *pipe, union tc_payload *payload) { struct tc_replace_buffer_storage *p = (struct tc_replace_buffer_storage *)payload; p->func(pipe, p->dst, p->src); pipe_resource_reference(&p->dst, NULL); pipe_resource_reference(&p->src, NULL); } static bool tc_invalidate_buffer(struct threaded_context *tc, struct threaded_resource *tbuf) { /* We can't check if the buffer is idle, so we invalidate it * unconditionally. */ struct pipe_screen *screen = tc->base.screen; struct pipe_resource *new_buf; /* Shared, pinned, and sparse buffers can't be reallocated. */ if (tbuf->is_shared || tbuf->is_user_ptr || tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE) return false; /* Allocate a new one. */ new_buf = screen->resource_create(screen, &tbuf->b); if (!new_buf) return false; /* Replace the "latest" pointer. */ if (tbuf->latest != &tbuf->b) pipe_resource_reference(&tbuf->latest, NULL); tbuf->latest = new_buf; util_range_set_empty(&tbuf->valid_buffer_range); /* The valid range should point to the original buffer. */ threaded_resource(new_buf)->base_valid_buffer_range = &tbuf->valid_buffer_range; /* Enqueue storage replacement of the original buffer. */ struct tc_replace_buffer_storage *p = tc_add_struct_typed_call(tc, TC_CALL_replace_buffer_storage, tc_replace_buffer_storage); p->func = tc->replace_buffer_storage; tc_set_resource_reference(&p->dst, &tbuf->b); tc_set_resource_reference(&p->src, new_buf); return true; } static unsigned tc_improve_map_buffer_flags(struct threaded_context *tc, struct threaded_resource *tres, unsigned usage, unsigned offset, unsigned size) { /* Sparse buffers can't be mapped directly and can't be reallocated * (fully invalidated). That may just be a radeonsi limitation, but * the threaded context must obey it with radeonsi. */ if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) { /* We can use DISCARD_RANGE instead of full discard. This is the only * fast path for sparse buffers that doesn't need thread synchronization. */ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) usage |= PIPE_TRANSFER_DISCARD_RANGE; /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers. * The threaded context doesn't do unsychronized mappings and invalida- * tions of sparse buffers, therefore a correct driver behavior won't * result in an incorrect behavior with the threaded context. */ return usage; } /* Handle CPU reads trivially. */ if (usage & PIPE_TRANSFER_READ) { /* Driver aren't allowed to do buffer invalidations. */ return (usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) | TC_TRANSFER_MAP_NO_INVALIDATE | TC_TRANSFER_MAP_IGNORE_VALID_RANGE; } /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !tres->is_shared && !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) usage |= PIPE_TRANSFER_UNSYNCHRONIZED; if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { /* If discarding the entire range, discard the whole resource instead. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE && offset == 0 && size == tres->b.width0) usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; /* Discard the whole resource if needed. */ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { if (tc_invalidate_buffer(tc, tres)) usage |= PIPE_TRANSFER_UNSYNCHRONIZED; else usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */ } } /* We won't need this flag anymore. */ /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */ usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; /* GL_AMD_pinned_memory and persistent mappings can't use staging * buffers. */ if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT) || tres->is_user_ptr) usage &= ~PIPE_TRANSFER_DISCARD_RANGE; /* Unsychronized buffer mappings don't have to synchronize the thread. */ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */ /* Never invalidate inside the driver and never infer "unsynchronized". */ return usage | TC_TRANSFER_MAP_NO_INVALIDATE | TC_TRANSFER_MAP_IGNORE_VALID_RANGE; } static void * tc_transfer_map(struct pipe_context *_pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **transfer) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_resource *tres = threaded_resource(resource); struct pipe_context *pipe = tc->pipe; if (resource->target == PIPE_BUFFER) { usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); /* Do a staging transfer within the threaded context. The driver should * only get resource_copy_region. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE) { struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers); uint8_t *map; ttrans->staging = NULL; u_upload_alloc(tc->base.stream_uploader, 0, box->width + (box->x % tc->map_buffer_alignment), 64, &ttrans->offset, &ttrans->staging, (void**)&map); if (!map) { slab_free(&tc->pool_transfers, ttrans); return NULL; } tc_set_resource_reference(&ttrans->b.resource, resource); ttrans->b.level = 0; ttrans->b.usage = usage; ttrans->b.box = *box; ttrans->b.stride = 0; ttrans->b.layer_stride = 0; *transfer = &ttrans->b; return map + (box->x % tc->map_buffer_alignment); } } /* Unsychronized buffer mappings don't have to synchronize the thread. */ if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" : usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" : usage & PIPE_TRANSFER_READ ? " read" : " ??"); return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource, level, usage, box, transfer); } struct tc_transfer_flush_region { struct pipe_transfer *transfer; struct pipe_box box; }; static void tc_call_transfer_flush_region(struct pipe_context *pipe, union tc_payload *payload) { struct tc_transfer_flush_region *p = (struct tc_transfer_flush_region *)payload; pipe->transfer_flush_region(pipe, p->transfer, &p->box); } struct tc_resource_copy_region { struct pipe_resource *dst; unsigned dst_level; unsigned dstx, dsty, dstz; struct pipe_resource *src; unsigned src_level; struct pipe_box src_box; }; static void tc_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box); static void tc_buffer_do_flush_region(struct threaded_context *tc, struct threaded_transfer *ttrans, const struct pipe_box *box) { struct threaded_resource *tres = threaded_resource(ttrans->b.resource); if (ttrans->staging) { struct pipe_box src_box; u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment, box->width, &src_box); /* Copy the staging buffer into the original one. */ tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0, ttrans->staging, 0, &src_box); } util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width); } static void tc_transfer_flush_region(struct pipe_context *_pipe, struct pipe_transfer *transfer, const struct pipe_box *rel_box) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_transfer *ttrans = threaded_transfer(transfer); struct threaded_resource *tres = threaded_resource(transfer->resource); unsigned required_usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_FLUSH_EXPLICIT; if (tres->b.target == PIPE_BUFFER) { if ((transfer->usage & required_usage) == required_usage) { struct pipe_box box; u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box); tc_buffer_do_flush_region(tc, ttrans, &box); } /* Staging transfers don't send the call to the driver. */ if (ttrans->staging) return; } struct tc_transfer_flush_region *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region); p->transfer = transfer; p->box = *rel_box; } static void tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload) { pipe->transfer_unmap(pipe, payload->transfer); } static void tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_transfer *ttrans = threaded_transfer(transfer); struct threaded_resource *tres = threaded_resource(transfer->resource); if (tres->b.target == PIPE_BUFFER) { if (transfer->usage & PIPE_TRANSFER_WRITE && !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) tc_buffer_do_flush_region(tc, ttrans, &transfer->box); /* Staging transfers don't send the call to the driver. */ if (ttrans->staging) { pipe_resource_reference(&ttrans->staging, NULL); pipe_resource_reference(&ttrans->b.resource, NULL); slab_free(&tc->pool_transfers, ttrans); return; } } tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer; } struct tc_buffer_subdata { struct pipe_resource *resource; unsigned usage, offset, size; char slot[0]; /* more will be allocated if needed */ }; static void tc_call_buffer_subdata(struct pipe_context *pipe, union tc_payload *payload) { struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)payload; pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size, p->slot); pipe_resource_reference(&p->resource, NULL); } static void tc_buffer_subdata(struct pipe_context *_pipe, struct pipe_resource *resource, unsigned usage, unsigned offset, unsigned size, const void *data) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_resource *tres = threaded_resource(resource); if (!size) return; usage |= PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE; usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size); /* Unsychronized and big transfers should use transfer_map. Also handle * full invalidations, because drivers aren't allowed to do them. */ if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) || size > TC_MAX_SUBDATA_BYTES) { struct pipe_transfer *transfer; struct pipe_box box; uint8_t *map = NULL; u_box_1d(offset, size, &box); map = tc_transfer_map(_pipe, resource, 0, usage, &box, &transfer); if (map) { memcpy(map, data, size); tc_transfer_unmap(_pipe, transfer); } return; } util_range_add(&tres->valid_buffer_range, offset, offset + size); /* The upload is small. Enqueue it. */ struct tc_buffer_subdata *p = tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size); tc_set_resource_reference(&p->resource, resource); p->usage = usage; p->offset = offset; p->size = size; memcpy(p->slot, data, size); } struct tc_texture_subdata { struct pipe_resource *resource; unsigned level, usage, stride, layer_stride; struct pipe_box box; char slot[0]; /* more will be allocated if needed */ }; static void tc_call_texture_subdata(struct pipe_context *pipe, union tc_payload *payload) { struct tc_texture_subdata *p = (struct tc_texture_subdata *)payload; pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box, p->slot, p->stride, p->layer_stride); pipe_resource_reference(&p->resource, NULL); } static void tc_texture_subdata(struct pipe_context *_pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, const void *data, unsigned stride, unsigned layer_stride) { struct threaded_context *tc = threaded_context(_pipe); unsigned size; assert(box->height >= 1); assert(box->depth >= 1); size = (box->depth - 1) * layer_stride + (box->height - 1) * stride + box->width * util_format_get_blocksize(resource->format); if (!size) return; /* Small uploads can be enqueued, big uploads must sync. */ if (size <= TC_MAX_SUBDATA_BYTES) { struct tc_texture_subdata *p = tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size); tc_set_resource_reference(&p->resource, resource); p->level = level; p->usage = usage; p->box = *box; p->stride = stride; p->layer_stride = layer_stride; memcpy(p->slot, data, size); } else { struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->texture_subdata(pipe, resource, level, usage, box, data, stride, layer_stride); } } /******************************************************************** * miscellaneous */ #define TC_FUNC_SYNC_RET0(ret_type, func) \ static ret_type \ tc_##func(struct pipe_context *_pipe) \ { \ struct threaded_context *tc = threaded_context(_pipe); \ struct pipe_context *pipe = tc->pipe; \ tc_sync(tc); \ return pipe->func(pipe); \ } TC_FUNC_SYNC_RET0(enum pipe_reset_status, get_device_reset_status) TC_FUNC_SYNC_RET0(uint64_t, get_timestamp) static void tc_get_sample_position(struct pipe_context *_pipe, unsigned sample_count, unsigned sample_index, float *out_value) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->get_sample_position(pipe, sample_count, sample_index, out_value); } static void tc_set_device_reset_callback(struct pipe_context *_pipe, const struct pipe_device_reset_callback *cb) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->set_device_reset_callback(pipe, cb); } struct tc_string_marker { int len; char slot[0]; /* more will be allocated if needed */ }; static void tc_call_emit_string_marker(struct pipe_context *pipe, union tc_payload *payload) { struct tc_string_marker *p = (struct tc_string_marker *)payload; pipe->emit_string_marker(pipe, p->slot, p->len); } static void tc_emit_string_marker(struct pipe_context *_pipe, const char *string, int len) { struct threaded_context *tc = threaded_context(_pipe); if (len <= TC_MAX_STRING_MARKER_BYTES) { struct tc_string_marker *p = tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len); memcpy(p->slot, string, len); p->len = len; } else { struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->emit_string_marker(pipe, string, len); } } static void tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream, unsigned flags) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->dump_debug_state(pipe, stream, flags); } static void tc_set_debug_callback(struct pipe_context *_pipe, const struct pipe_debug_callback *cb) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; /* Drop all synchronous debug callbacks. Drivers are expected to be OK * with this. shader-db will use an environment variable to disable * the threaded context. */ if (cb && cb->debug_message && !cb->async) return; tc_sync(tc); pipe->set_debug_callback(pipe, cb); } static void tc_create_fence_fd(struct pipe_context *_pipe, struct pipe_fence_handle **fence, int fd) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->create_fence_fd(pipe, fence, fd); } static void tc_fence_server_sync(struct pipe_context *_pipe, struct pipe_fence_handle *fence) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->fence_server_sync(pipe, fence); } static struct pipe_video_codec * tc_create_video_codec(struct pipe_context *_pipe, const struct pipe_video_codec *templ) { unreachable("Threaded context should not be enabled for video APIs"); return NULL; } static struct pipe_video_buffer * tc_create_video_buffer(struct pipe_context *_pipe, const struct pipe_video_buffer *templ) { unreachable("Threaded context should not be enabled for video APIs"); return NULL; } /******************************************************************** * draw, launch, clear, blit, copy, flush */ static void tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; struct threaded_query *tq, *tmp; LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) { tq->flushed = true; LIST_DEL(&tq->head_unflushed); } /* TODO: deferred flushes? */ tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" : flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal"); pipe->flush(pipe, fence, flags); } /* This is actually variable-sized, because indirect isn't allocated if it's * not needed. */ struct tc_full_draw_info { struct pipe_draw_info draw; struct pipe_draw_indirect_info indirect; }; static void tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload) { struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload; pipe->draw_vbo(pipe, &info->draw); pipe_so_target_reference(&info->draw.count_from_stream_output, NULL); if (info->draw.index_size) pipe_resource_reference(&info->draw.index.resource, NULL); if (info->draw.indirect) { pipe_resource_reference(&info->indirect.buffer, NULL); pipe_resource_reference(&info->indirect.indirect_draw_count, NULL); } } static struct tc_full_draw_info * tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect) { return (struct tc_full_draw_info*) tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo, indirect ? sizeof(struct tc_full_draw_info) : sizeof(struct pipe_draw_info)); } static void tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_draw_indirect_info *indirect = info->indirect; unsigned index_size = info->index_size; bool has_user_indices = info->has_user_indices; if (index_size && has_user_indices) { unsigned size = info->count * index_size; struct pipe_resource *buffer = NULL; unsigned offset; tc_assert(!indirect); /* This must be done before adding draw_vbo, because it could generate * e.g. transfer_unmap and flush partially-uninitialized draw_vbo * to the driver if it was done afterwards. */ u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user, &offset, &buffer); if (unlikely(!buffer)) return; struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false); p->draw.count_from_stream_output = NULL; pipe_so_target_reference(&p->draw.count_from_stream_output, info->count_from_stream_output); memcpy(&p->draw, info, sizeof(*info)); p->draw.has_user_indices = false; p->draw.index.resource = buffer; p->draw.start = offset / index_size; } else { /* Non-indexed call or indexed with a real index buffer. */ struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL); p->draw.count_from_stream_output = NULL; pipe_so_target_reference(&p->draw.count_from_stream_output, info->count_from_stream_output); if (index_size) { tc_set_resource_reference(&p->draw.index.resource, info->index.resource); } memcpy(&p->draw, info, sizeof(*info)); if (indirect) { tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer); tc_set_resource_reference(&p->indirect.indirect_draw_count, indirect->indirect_draw_count); memcpy(&p->indirect, indirect, sizeof(*indirect)); p->draw.indirect = &p->indirect; } } } static void tc_call_launch_grid(struct pipe_context *pipe, union tc_payload *payload) { struct pipe_grid_info *p = (struct pipe_grid_info *)payload; pipe->launch_grid(pipe, p); pipe_resource_reference(&p->indirect, NULL); } static void tc_launch_grid(struct pipe_context *_pipe, const struct pipe_grid_info *info) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_grid_info *p = tc_add_struct_typed_call(tc, TC_CALL_launch_grid, pipe_grid_info); assert(info->input == NULL); tc_set_resource_reference(&p->indirect, info->indirect); memcpy(p, info, sizeof(*info)); } static void tc_call_resource_copy_region(struct pipe_context *pipe, union tc_payload *payload) { struct tc_resource_copy_region *p = (struct tc_resource_copy_region *)payload; pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty, p->dstz, p->src, p->src_level, &p->src_box); pipe_resource_reference(&p->dst, NULL); pipe_resource_reference(&p->src, NULL); } static void tc_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_resource *tdst = threaded_resource(dst); struct tc_resource_copy_region *p = tc_add_struct_typed_call(tc, TC_CALL_resource_copy_region, tc_resource_copy_region); tc_set_resource_reference(&p->dst, dst); p->dst_level = dst_level; p->dstx = dstx; p->dsty = dsty; p->dstz = dstz; tc_set_resource_reference(&p->src, src); p->src_level = src_level; p->src_box = *src_box; if (dst->target == PIPE_BUFFER) util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width); } static void tc_call_blit(struct pipe_context *pipe, union tc_payload *payload) { struct pipe_blit_info *blit = (struct pipe_blit_info*)payload; pipe->blit(pipe, blit); pipe_resource_reference(&blit->dst.resource, NULL); pipe_resource_reference(&blit->src.resource, NULL); } static void tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_blit_info *blit = tc_add_struct_typed_call(tc, TC_CALL_blit, pipe_blit_info); tc_set_resource_reference(&blit->dst.resource, info->dst.resource); tc_set_resource_reference(&blit->src.resource, info->src.resource); memcpy(blit, info, sizeof(*info)); } struct tc_generate_mipmap { struct pipe_resource *res; enum pipe_format format; unsigned base_level; unsigned last_level; unsigned first_layer; unsigned last_layer; }; static void tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload) { struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload; bool result = pipe->generate_mipmap(pipe, p->res, p->format, p->base_level, p->last_level, p->first_layer, p->last_layer); assert(result); pipe_resource_reference(&p->res, NULL); } static boolean tc_generate_mipmap(struct pipe_context *_pipe, struct pipe_resource *res, enum pipe_format format, unsigned base_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; struct pipe_screen *screen = pipe->screen; unsigned bind = PIPE_BIND_SAMPLER_VIEW; if (util_format_is_depth_or_stencil(format)) bind = PIPE_BIND_DEPTH_STENCIL; else bind = PIPE_BIND_RENDER_TARGET; if (!screen->is_format_supported(screen, format, res->target, res->nr_samples, bind)) return false; struct tc_generate_mipmap *p = tc_add_struct_typed_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap); tc_set_resource_reference(&p->res, res); p->format = format; p->base_level = base_level; p->last_level = last_level; p->first_layer = first_layer; p->last_layer = last_layer; return true; } static void tc_call_flush_resource(struct pipe_context *pipe, union tc_payload *payload) { pipe->flush_resource(pipe, payload->resource); pipe_resource_reference(&payload->resource, NULL); } static void tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource) { struct threaded_context *tc = threaded_context(_pipe); union tc_payload *payload = tc_add_small_call(tc, TC_CALL_flush_resource); tc_set_resource_reference(&payload->resource, resource); } static void tc_call_invalidate_resource(struct pipe_context *pipe, union tc_payload *payload) { pipe->invalidate_resource(pipe, payload->resource); pipe_resource_reference(&payload->resource, NULL); } static void tc_invalidate_resource(struct pipe_context *_pipe, struct pipe_resource *resource) { struct threaded_context *tc = threaded_context(_pipe); if (resource->target == PIPE_BUFFER) { tc_invalidate_buffer(tc, threaded_resource(resource)); return; } union tc_payload *payload = tc_add_small_call(tc, TC_CALL_invalidate_resource); tc_set_resource_reference(&payload->resource, resource); } struct tc_clear { unsigned buffers; union pipe_color_union color; double depth; unsigned stencil; }; static void tc_call_clear(struct pipe_context *pipe, union tc_payload *payload) { struct tc_clear *p = (struct tc_clear *)payload; pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil); } static void tc_clear(struct pipe_context *_pipe, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct threaded_context *tc = threaded_context(_pipe); struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear); p->buffers = buffers; p->color = *color; p->depth = depth; p->stencil = stencil; } static void tc_clear_render_target(struct pipe_context *_pipe, struct pipe_surface *dst, const union pipe_color_union *color, unsigned dstx, unsigned dsty, unsigned width, unsigned height, bool render_condition_enabled) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height, render_condition_enabled); } static void tc_clear_depth_stencil(struct pipe_context *_pipe, struct pipe_surface *dst, unsigned clear_flags, double depth, unsigned stencil, unsigned dstx, unsigned dsty, unsigned width, unsigned height, bool render_condition_enabled) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil, dstx, dsty, width, height, render_condition_enabled); } struct tc_clear_buffer { struct pipe_resource *res; unsigned offset; unsigned size; char clear_value[16]; int clear_value_size; }; static void tc_call_clear_buffer(struct pipe_context *pipe, union tc_payload *payload) { struct tc_clear_buffer *p = (struct tc_clear_buffer *)payload; pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value, p->clear_value_size); pipe_resource_reference(&p->res, NULL); } static void tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res, unsigned offset, unsigned size, const void *clear_value, int clear_value_size) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_resource *tres = threaded_resource(res); struct tc_clear_buffer *p = tc_add_struct_typed_call(tc, TC_CALL_clear_buffer, tc_clear_buffer); tc_set_resource_reference(&p->res, res); p->offset = offset; p->size = size; memcpy(p->clear_value, clear_value, clear_value_size); p->clear_value_size = clear_value_size; util_range_add(&tres->valid_buffer_range, offset, offset + size); } struct tc_clear_texture { struct pipe_resource *res; unsigned level; struct pipe_box box; char data[16]; }; static void tc_call_clear_texture(struct pipe_context *pipe, union tc_payload *payload) { struct tc_clear_texture *p = (struct tc_clear_texture *)payload; pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data); pipe_resource_reference(&p->res, NULL); } static void tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res, unsigned level, const struct pipe_box *box, const void *data) { struct threaded_context *tc = threaded_context(_pipe); struct tc_clear_texture *p = tc_add_struct_typed_call(tc, TC_CALL_clear_texture, tc_clear_texture); tc_set_resource_reference(&p->res, res); p->level = level; p->box = *box; memcpy(p->data, data, util_format_get_blocksize(res->format)); } struct tc_resource_commit { struct pipe_resource *res; unsigned level; struct pipe_box box; bool commit; }; static void tc_call_resource_commit(struct pipe_context *pipe, union tc_payload *payload) { struct tc_resource_commit *p = (struct tc_resource_commit *)payload; pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit); pipe_resource_reference(&p->res, NULL); } static bool tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res, unsigned level, struct pipe_box *box, bool commit) { struct threaded_context *tc = threaded_context(_pipe); struct tc_resource_commit *p = tc_add_struct_typed_call(tc, TC_CALL_resource_commit, tc_resource_commit); tc_set_resource_reference(&p->res, res); p->level = level; p->box = *box; p->commit = commit; return true; /* we don't care about the return value for this call */ } /******************************************************************** * create & destroy */ static void tc_destroy(struct pipe_context *_pipe) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; tc_sync(tc); if (util_queue_is_initialized(&tc->queue)) { util_queue_destroy(&tc->queue); for (unsigned i = 0; i < TC_MAX_BATCHES; i++) util_queue_fence_destroy(&tc->batch_slots[i].fence); } if (tc->base.const_uploader && tc->base.stream_uploader != tc->base.const_uploader) u_upload_destroy(tc->base.const_uploader); if (tc->base.stream_uploader) u_upload_destroy(tc->base.stream_uploader); slab_destroy_child(&tc->pool_transfers); pipe->destroy(pipe); os_free_aligned(tc); } static const tc_execute execute_func[TC_NUM_CALLS] = { #define CALL(name) tc_call_##name, #include "u_threaded_context_calls.h" #undef CALL }; /** * Wrap an existing pipe_context into a threaded_context. * * \param pipe pipe_context to wrap * \param parent_transfer_pool parent slab pool set up for creating pipe_- * transfer objects; the driver should have one * in pipe_screen. * \param replace_buffer callback for replacing a pipe_resource's storage * with another pipe_resource's storage. * \param out if successful, the threaded_context will be returned here in * addition to the return value if "out" != NULL */ struct pipe_context * threaded_context_create(struct pipe_context *pipe, struct slab_parent_pool *parent_transfer_pool, tc_replace_buffer_storage_func replace_buffer, struct threaded_context **out) { struct threaded_context *tc; STATIC_ASSERT(sizeof(union tc_payload) <= 8); STATIC_ASSERT(sizeof(struct tc_call) <= 16); if (!pipe) return NULL; util_cpu_detect(); if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) return pipe; tc = os_malloc_aligned(sizeof(struct threaded_context), 16); if (!tc) { pipe->destroy(pipe); return NULL; } memset(tc, 0, sizeof(*tc)); assert((uintptr_t)tc % 16 == 0); /* These should be static asserts, but they don't work with MSVC */ assert(offsetof(struct threaded_context, batch_slots) % 16 == 0); assert(offsetof(struct threaded_context, batch_slots[0].call) % 16 == 0); assert(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0); assert(offsetof(struct threaded_context, batch_slots[1].call) % 16 == 0); /* The driver context isn't wrapped, so set its "priv" to NULL. */ pipe->priv = NULL; tc->pipe = pipe; tc->replace_buffer_storage = replace_buffer; tc->map_buffer_alignment = pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT); tc->base.priv = pipe; /* priv points to the wrapped driver context */ tc->base.screen = pipe->screen; tc->base.destroy = tc_destroy; tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader); if (pipe->stream_uploader == pipe->const_uploader) tc->base.const_uploader = tc->base.stream_uploader; else tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader); if (!tc->base.stream_uploader || !tc->base.const_uploader) goto fail; /* The queue size is the number of batches "waiting". Batches are removed * from the queue before being executed, so keep one tc_batch slot for that * execution. Also, keep one unused slot for an unflushed batch. */ if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0)) goto fail; for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { tc->batch_slots[i].sentinel = TC_SENTINEL; tc->batch_slots[i].pipe = pipe; util_queue_fence_init(&tc->batch_slots[i].fence); } LIST_INITHEAD(&tc->unflushed_queries); slab_create_child(&tc->pool_transfers, parent_transfer_pool); #define CTX_INIT(_member) \ tc->base._member = tc->pipe->_member ? tc_##_member : NULL CTX_INIT(flush); CTX_INIT(draw_vbo); CTX_INIT(launch_grid); CTX_INIT(resource_copy_region); CTX_INIT(blit); CTX_INIT(clear); CTX_INIT(clear_render_target); CTX_INIT(clear_depth_stencil); CTX_INIT(clear_buffer); CTX_INIT(clear_texture); CTX_INIT(flush_resource); CTX_INIT(generate_mipmap); CTX_INIT(render_condition); CTX_INIT(create_query); CTX_INIT(create_batch_query); CTX_INIT(destroy_query); CTX_INIT(begin_query); CTX_INIT(end_query); CTX_INIT(get_query_result); CTX_INIT(get_query_result_resource); CTX_INIT(set_active_query_state); CTX_INIT(create_blend_state); CTX_INIT(bind_blend_state); CTX_INIT(delete_blend_state); CTX_INIT(create_sampler_state); CTX_INIT(bind_sampler_states); CTX_INIT(delete_sampler_state); CTX_INIT(create_rasterizer_state); CTX_INIT(bind_rasterizer_state); CTX_INIT(delete_rasterizer_state); CTX_INIT(create_depth_stencil_alpha_state); CTX_INIT(bind_depth_stencil_alpha_state); CTX_INIT(delete_depth_stencil_alpha_state); CTX_INIT(create_fs_state); CTX_INIT(bind_fs_state); CTX_INIT(delete_fs_state); CTX_INIT(create_vs_state); CTX_INIT(bind_vs_state); CTX_INIT(delete_vs_state); CTX_INIT(create_gs_state); CTX_INIT(bind_gs_state); CTX_INIT(delete_gs_state); CTX_INIT(create_tcs_state); CTX_INIT(bind_tcs_state); CTX_INIT(delete_tcs_state); CTX_INIT(create_tes_state); CTX_INIT(bind_tes_state); CTX_INIT(delete_tes_state); CTX_INIT(create_compute_state); CTX_INIT(bind_compute_state); CTX_INIT(delete_compute_state); CTX_INIT(create_vertex_elements_state); CTX_INIT(bind_vertex_elements_state); CTX_INIT(delete_vertex_elements_state); CTX_INIT(set_blend_color); CTX_INIT(set_stencil_ref); CTX_INIT(set_sample_mask); CTX_INIT(set_min_samples); CTX_INIT(set_clip_state); CTX_INIT(set_constant_buffer); CTX_INIT(set_framebuffer_state); CTX_INIT(set_polygon_stipple); CTX_INIT(set_scissor_states); CTX_INIT(set_viewport_states); CTX_INIT(set_window_rectangles); CTX_INIT(set_sampler_views); CTX_INIT(set_tess_state); CTX_INIT(set_shader_buffers); CTX_INIT(set_shader_images); CTX_INIT(set_vertex_buffers); CTX_INIT(create_stream_output_target); CTX_INIT(stream_output_target_destroy); CTX_INIT(set_stream_output_targets); CTX_INIT(create_sampler_view); CTX_INIT(sampler_view_destroy); CTX_INIT(create_surface); CTX_INIT(surface_destroy); CTX_INIT(transfer_map); CTX_INIT(transfer_flush_region); CTX_INIT(transfer_unmap); CTX_INIT(buffer_subdata); CTX_INIT(texture_subdata); CTX_INIT(texture_barrier); CTX_INIT(memory_barrier); CTX_INIT(resource_commit); CTX_INIT(create_video_codec); CTX_INIT(create_video_buffer); CTX_INIT(set_compute_resources); CTX_INIT(set_global_binding); CTX_INIT(get_sample_position); CTX_INIT(invalidate_resource); CTX_INIT(get_device_reset_status); CTX_INIT(set_device_reset_callback); CTX_INIT(dump_debug_state); CTX_INIT(emit_string_marker); CTX_INIT(set_debug_callback); CTX_INIT(create_fence_fd); CTX_INIT(fence_server_sync); CTX_INIT(get_timestamp); CTX_INIT(create_texture_handle); CTX_INIT(delete_texture_handle); CTX_INIT(make_texture_handle_resident); CTX_INIT(create_image_handle); CTX_INIT(delete_image_handle); CTX_INIT(make_image_handle_resident); #undef CTX_INIT if (out) *out = tc; return &tc->base; fail: tc_destroy(&tc->base); return NULL; }