summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/util/u_vbuf.c')
-rw-r--r--lib/mesa/src/gallium/auxiliary/util/u_vbuf.c118
1 files changed, 100 insertions, 18 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
index 57e2d98f0..5e4f4f4b9 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
@@ -112,6 +112,7 @@ struct u_vbuf_elements {
* its vertex data must be translated to native_format[i]. */
enum pipe_format native_format[PIPE_MAX_ATTRIBS];
unsigned native_format_size[PIPE_MAX_ATTRIBS];
+ unsigned component_size[PIPE_MAX_ATTRIBS];
/* Which buffers are used by the vertex element state. */
uint32_t used_vb_mask;
@@ -127,6 +128,7 @@ struct u_vbuf_elements {
/* Which buffer has at least one vertex element referencing it
* compatible. */
uint32_t compatible_vb_mask_any;
+ uint32_t vb_align_mask[2]; //which buffers require 2/4 byte alignments
/* Which buffer has all vertex elements referencing it compatible. */
uint32_t compatible_vb_mask_all;
@@ -163,6 +165,8 @@ struct u_vbuf {
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
uint32_t enabled_vb_mask;
+ uint32_t unaligned_vb_mask[2]; //16/32bit
+
/* Vertex buffers for the driver.
* There are usually no user buffers. */
struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -303,6 +307,11 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
caps->velem_src_offset_unaligned =
!screen->get_param(screen,
PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
+ caps->attrib_component_unaligned =
+ !screen->get_param(screen,
+ PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY);
+ assert(caps->attrib_component_unaligned ||
+ (caps->velem_src_offset_unaligned && caps->buffer_stride_unaligned && caps->buffer_offset_unaligned));
caps->user_vertex_buffers =
screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
caps->max_vertex_buffers =
@@ -330,6 +339,7 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
if (!caps->buffer_offset_unaligned ||
!caps->buffer_stride_unaligned ||
+ !caps->attrib_component_unaligned ||
!caps->velem_src_offset_unaligned)
caps->fallback_always = true;
@@ -669,13 +679,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
const struct pipe_draw_info *info,
const struct pipe_draw_start_count_bias *draw,
int start_vertex, unsigned num_vertices,
- int min_index, boolean unroll_indices)
+ int min_index, boolean unroll_indices,
+ uint32_t misaligned)
{
unsigned mask[VB_NUM] = {0};
struct translate_key key[VB_NUM];
unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
unsigned i, type;
- const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
+ const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask) &
mgr->ve->used_vb_mask;
const int start[VB_NUM] = {
@@ -727,6 +738,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
return FALSE;
}
+ unsigned min_alignment[VB_NUM] = {0};
/* Initialize the translate keys. */
for (i = 0; i < mgr->ve->count; i++) {
struct translate_key *k;
@@ -765,15 +777,25 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
te->input_offset = mgr->ve->ve[i].src_offset;
te->output_format = output_format;
te->output_offset = k->output_stride;
+ unsigned adjustment = 0;
+ if (!mgr->caps.attrib_component_unaligned &&
+ te->output_offset % mgr->ve->component_size[i] != 0) {
+ unsigned aligned = align(te->output_offset, mgr->ve->component_size[i]);
+ adjustment = aligned - te->output_offset;
+ te->output_offset = aligned;
+ }
- k->output_stride += mgr->ve->native_format_size[i];
+ k->output_stride += mgr->ve->native_format_size[i] + adjustment;
k->nr_elements++;
+ min_alignment[type] = MAX2(min_alignment[type], mgr->ve->component_size[i]);
}
/* Translate buffers. */
for (type = 0; type < VB_NUM; type++) {
if (key[type].nr_elements) {
enum pipe_error err;
+ if (!mgr->caps.attrib_component_unaligned)
+ key[type].output_stride = align(key[type].output_stride, min_alignment[type]);
err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
mask[type], mgr->fallback_vbs[type],
start[type], num[type], min_index,
@@ -881,13 +903,27 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
ve->native_format_size[i] =
util_format_get_blocksize(ve->native_format[i]);
+ const struct util_format_description *desc = util_format_description(format);
+ bool is_packed = false;
+ for (unsigned c = 0; c < desc->nr_channels; c++)
+ is_packed |= desc->channel[c].size != desc->channel[0].size || desc->channel[c].size % 8 != 0;
+ unsigned component_size = is_packed ?
+ ve->native_format_size[i] : (ve->native_format_size[i] / desc->nr_channels);
+ ve->component_size[i] = component_size;
+
if (ve->ve[i].src_format != format ||
(!mgr->caps.velem_src_offset_unaligned &&
- ve->ve[i].src_offset % 4 != 0)) {
+ ve->ve[i].src_offset % 4 != 0) ||
+ (!mgr->caps.attrib_component_unaligned &&
+ ve->ve[i].src_offset % component_size != 0)) {
ve->incompatible_elem_mask |= 1 << i;
ve->incompatible_vb_mask_any |= vb_index_bit;
} else {
ve->compatible_vb_mask_any |= vb_index_bit;
+ if (component_size == 2)
+ ve->vb_align_mask[0] |= vb_index_bit;
+ else if (component_size == 4)
+ ve->vb_align_mask[1] |= vb_index_bit;
}
}
@@ -951,21 +987,25 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
uint32_t incompatible_vb_mask = 0;
/* which buffers have a non-zero stride */
uint32_t nonzero_stride_vb_mask = 0;
- const uint32_t mask =
+ /* which buffers are unaligned to 2/4 bytes */
+ uint32_t unaligned_vb_mask[2] = {0};
+ uint32_t mask =
~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot);
- /* Zero out the bits we are going to rewrite completely. */
- mgr->user_vb_mask &= mask;
- mgr->incompatible_vb_mask &= mask;
- mgr->nonzero_stride_vb_mask &= mask;
- mgr->enabled_vb_mask &= mask;
-
if (!bufs) {
struct pipe_context *pipe = mgr->pipe;
/* Unbind. */
unsigned total_count = count + unbind_num_trailing_slots;
mgr->dirty_real_vb_mask &= mask;
+ /* Zero out the bits we are going to rewrite completely. */
+ mgr->user_vb_mask &= mask;
+ mgr->incompatible_vb_mask &= mask;
+ mgr->nonzero_stride_vb_mask &= mask;
+ mgr->enabled_vb_mask &= mask;
+ mgr->unaligned_vb_mask[0] &= mask;
+ mgr->unaligned_vb_mask[1] &= mask;
+
for (i = 0; i < total_count; i++) {
unsigned dst_index = start_slot + i;
@@ -990,6 +1030,21 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
continue;
}
+ bool not_user = !vb->is_user_buffer && vb->is_user_buffer == orig_vb->is_user_buffer;
+ /* struct isn't tightly packed: do not use memcmp */
+ if (not_user && orig_vb->stride == vb->stride &&
+ orig_vb->buffer_offset == vb->buffer_offset && orig_vb->buffer.resource == vb->buffer.resource) {
+ mask |= BITFIELD_BIT(dst_index);
+ if (take_ownership) {
+ pipe_vertex_buffer_unreference(orig_vb);
+ /* the pointer was unset in the line above, so copy it back */
+ orig_vb->buffer.resource = vb->buffer.resource;
+ }
+ if (mask == UINT32_MAX)
+ return;
+ continue;
+ }
+
if (take_ownership) {
pipe_vertex_buffer_unreference(orig_vb);
memcpy(orig_vb, vb, sizeof(*vb));
@@ -1012,6 +1067,13 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
continue;
}
+ if (!mgr->caps.attrib_component_unaligned) {
+ if (vb->buffer_offset % 2 != 0 || vb->stride % 2 != 0)
+ unaligned_vb_mask[0] |= BITFIELD_BIT(dst_index);
+ if (vb->buffer_offset % 4 != 0 || vb->stride % 4 != 0)
+ unaligned_vb_mask[1] |= BITFIELD_BIT(dst_index);
+ }
+
if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
user_vb_mask |= 1 << dst_index;
real_vb->buffer_offset = vb->buffer_offset;
@@ -1031,10 +1093,21 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
}
+
+ /* Zero out the bits we are going to rewrite completely. */
+ mgr->user_vb_mask &= mask;
+ mgr->incompatible_vb_mask &= mask;
+ mgr->nonzero_stride_vb_mask &= mask;
+ mgr->enabled_vb_mask &= mask;
+ mgr->unaligned_vb_mask[0] &= mask;
+ mgr->unaligned_vb_mask[1] &= mask;
+
mgr->user_vb_mask |= user_vb_mask;
mgr->incompatible_vb_mask |= incompatible_vb_mask;
mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
mgr->enabled_vb_mask |= enabled_vb_mask;
+ mgr->unaligned_vb_mask[0] |= unaligned_vb_mask[0];
+ mgr->unaligned_vb_mask[1] |= unaligned_vb_mask[1];
/* All changed buffers are marked as dirty, even the NULL ones,
* which will cause the NULL buffers to be unbound in the driver later. */
@@ -1184,7 +1257,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
return PIPE_OK;
}
-static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
+static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misaligned)
{
/* See if there are any per-vertex attribs which will be uploaded or
* translated. Use bitmasks to get the info instead of looping over vertex
@@ -1192,12 +1265,13 @@ static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
return (mgr->ve->used_vb_mask &
((mgr->user_vb_mask |
mgr->incompatible_vb_mask |
+ misaligned |
mgr->ve->incompatible_vb_mask_any) &
mgr->ve->noninstance_vb_mask_any &
mgr->nonzero_stride_vb_mask)) != 0;
}
-static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
+static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned)
{
/* Return true if there are hw buffers which don't need to be translated.
*
@@ -1206,6 +1280,7 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
return (mgr->ve->used_vb_mask &
(~mgr->user_vb_mask &
~mgr->incompatible_vb_mask &
+ ~misaligned &
mgr->ve->compatible_vb_mask_all &
mgr->ve->noninstance_vb_mask_any &
mgr->nonzero_stride_vb_mask)) != 0;
@@ -1391,12 +1466,19 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
boolean unroll_indices = FALSE;
const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
- const uint32_t incompatible_vb_mask =
- mgr->incompatible_vb_mask & used_vb_mask;
struct pipe_draw_info new_info;
struct pipe_draw_start_count_bias new_draw;
unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
+ uint32_t misaligned = 0;
+ if (!mgr->caps.attrib_component_unaligned) {
+ for (unsigned i = 0; i < ARRAY_SIZE(mgr->unaligned_vb_mask); i++) {
+ misaligned |= mgr->ve->vb_align_mask[i] & mgr->unaligned_vb_mask[i];
+ }
+ }
+ const uint32_t incompatible_vb_mask =
+ (mgr->incompatible_vb_mask | misaligned) & used_vb_mask;
+
/* Normal draw. No fallback and no user buffers. */
if (!incompatible_vb_mask &&
!mgr->ve->incompatible_elem_mask &&
@@ -1584,7 +1666,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
if (new_info.index_size) {
/* See if anything needs to be done for per-vertex attribs. */
- if (u_vbuf_need_minmax_index(mgr)) {
+ if (u_vbuf_need_minmax_index(mgr, misaligned)) {
unsigned max_index;
if (new_info.index_bounds_valid) {
@@ -1607,7 +1689,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
if (!indirect &&
!new_info.primitive_restart &&
util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
- !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
+ !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) {
unroll_indices = TRUE;
user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
mgr->ve->noninstance_vb_mask_any);
@@ -1630,7 +1712,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
mgr->ve->incompatible_elem_mask) {
if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
start_vertex, num_vertices,
- min_index, unroll_indices)) {
+ min_index, unroll_indices, misaligned)) {
debug_warn_once("u_vbuf_translate_begin() failed");
goto cleanup;
}