/* * Copyright © 2013-2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "brw_vec4_surface_builder.h" using namespace brw; namespace { namespace array_utils { /** * Copy one every \p src_stride logical components of the argument into * one every \p dst_stride logical components of the result. */ src_reg emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size, unsigned dst_stride, unsigned src_stride) { if (src_stride == 1 && dst_stride == 1) { return src; } else { const dst_reg dst = bld.vgrf(src.type, DIV_ROUND_UP(size * dst_stride, 4)); for (unsigned i = 0; i < size; ++i) bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4), 1 << (i * dst_stride % 4)), swizzle(offset(src, 8, i * src_stride / 4), brw_swizzle_for_mask(1 << (i * src_stride % 4)))); return src_reg(dst); } } /** * Convert a VEC4 into an array of registers with the layout expected by * the recipient shared unit. If \p has_simd4x2 is true the argument is * left unmodified in SIMD4x2 form, otherwise it will be rearranged into * a SIMD8 vector. */ src_reg emit_insert(const vec4_builder &bld, const src_reg &src, unsigned n, bool has_simd4x2) { if (src.file == BAD_FILE || n == 0) { return src_reg(); } else { /* Pad unused components with zeroes. */ const unsigned mask = (1 << n) - 1; const dst_reg tmp = bld.vgrf(src.type); bld.MOV(writemask(tmp, mask), src); if (n < 4) bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); } } /** * Convert an array of registers back into a VEC4 according to the * layout expected from some shared unit. If \p has_simd4x2 is true the * argument is left unmodified in SIMD4x2 form, otherwise it will be * rearranged from SIMD8 form. */ src_reg emit_extract(const vec4_builder &bld, const src_reg src, unsigned n, bool has_simd4x2) { if (src.file == BAD_FILE || n == 0) { return src_reg(); } else { return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4); } } } } namespace brw { namespace surface_access { namespace { using namespace array_utils; /** * Generate a send opcode for a surface message and return the * result. */ src_reg emit_send(const vec4_builder &bld, enum opcode op, const src_reg &header, const src_reg &addr, unsigned addr_sz, const src_reg &src, unsigned src_sz, const src_reg &surface, unsigned arg, unsigned ret_sz, brw_predicate pred = BRW_PREDICATE_NONE) { /* Calculate the total number of components of the payload. */ const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1); const unsigned sz = header_sz + addr_sz + src_sz; /* Construct the payload. */ const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); unsigned n = 0; if (header_sz) bld.exec_all().MOV(offset(payload, 8, n++), retype(header, BRW_REGISTER_TYPE_UD)); for (unsigned i = 0; i < addr_sz; i++) bld.MOV(offset(payload, 8, n++), offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i)); for (unsigned i = 0; i < src_sz; i++) bld.MOV(offset(payload, 8, n++), offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i)); /* Reduce the dynamically uniform surface index to a single * scalar. */ const src_reg usurface = bld.emit_uniformize(surface); /* Emit the message send instruction. */ const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); vec4_instruction *inst = bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); inst->mlen = sz; inst->size_written = ret_sz * REG_SIZE; inst->header_size = header_sz; inst->predicate = pred; return src_reg(dst); } } /** * Emit an untyped surface read opcode. \p dims determines the number * of components of the address and \p size the number of components of * the returned value. */ src_reg emit_untyped_read(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, unsigned dims, unsigned size, brw_predicate pred) { return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(), emit_insert(bld, addr, dims, true), 1, src_reg(), 0, surface, size, 1, pred); } /** * Emit an untyped surface write opcode. \p dims determines the number * of components of the address and \p size the number of components of * the argument. */ void emit_untyped_write(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, const src_reg &src, unsigned dims, unsigned size, brw_predicate pred) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, emit_insert(bld, src, size, has_simd4x2), has_simd4x2 ? 1 : size, surface, size, 0, pred); } /** * Emit an untyped surface atomic opcode. \p dims determines the number * of components of the address and \p rsize the number of components of * the returned value (either zero or one). */ src_reg emit_untyped_atomic(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, const src_reg &src0, const src_reg &src1, unsigned dims, unsigned rsize, unsigned op, brw_predicate pred) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); /* Zip the components of both sources, they are represented as the X * and Y components of the same vector. */ const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); if (size >= 1) { bld.MOV(writemask(srcs, WRITEMASK_X), swizzle(src0, BRW_SWIZZLE_XXXX)); } if (size >= 2) { bld.MOV(writemask(srcs, WRITEMASK_Y), swizzle(src1, BRW_SWIZZLE_XXXX)); } return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, emit_insert(bld, src_reg(srcs), size, has_simd4x2), has_simd4x2 && size ? 1 : size, surface, op, rsize, pred); } namespace { /** * Initialize the header present in typed surface messages. */ src_reg emit_typed_message_header(const vec4_builder &bld) { const vec4_builder ubld = bld.exec_all(); const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); ubld.MOV(dst, brw_imm_d(0)); if (bld.shader->devinfo->gen == 7 && !bld.shader->devinfo->is_haswell) { /* The sample mask is used on IVB for the SIMD8 messages that * have no SIMD4x2 variant. We only use the two X channels * in that case, mask everything else out. */ ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11)); } return src_reg(dst); } } /** * Emit a typed surface read opcode. \p dims determines the number of * components of the address and \p size the number of components of the * returned value. */ src_reg emit_typed_read(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, unsigned dims, unsigned size) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); const src_reg tmp = emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ, emit_typed_message_header(bld), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, src_reg(), 0, surface, size, has_simd4x2 ? 1 : size); return emit_extract(bld, tmp, size, has_simd4x2); } /** * Emit a typed surface write opcode. \p dims determines the number of * components of the address and \p size the number of components of the * argument. */ void emit_typed_write(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, const src_reg &src, unsigned dims, unsigned size) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE, emit_typed_message_header(bld), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, emit_insert(bld, src, size, has_simd4x2), has_simd4x2 ? 1 : size, surface, size, 0); } /** * Emit a typed surface atomic opcode. \p dims determines the number of * components of the address and \p rsize the number of components of * the returned value (either zero or one). */ src_reg emit_typed_atomic(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, const src_reg &src0, const src_reg &src1, unsigned dims, unsigned rsize, unsigned op, brw_predicate pred) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); /* Zip the components of both sources, they are represented as the X * and Y components of the same vector. */ const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); if (size >= 1) bld.MOV(writemask(srcs, WRITEMASK_X), src0); if (size >= 2) bld.MOV(writemask(srcs, WRITEMASK_Y), src1); return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC, emit_typed_message_header(bld), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, emit_insert(bld, src_reg(srcs), size, has_simd4x2), has_simd4x2 ? 1 : size, surface, op, rsize, pred); } } }