summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
blob: 1ce8e5baec46f8fa54f17d1aa3cbe7c3e630c0d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*
 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
 * Copyright © 2015 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 */

#ifndef AMDGPU_CS_H
#define AMDGPU_CS_H

#include "amdgpu_bo.h"
#include "util/u_memory.h"
#include "drm-uapi/amdgpu_drm.h"

/* Smaller submits means the GPU gets busy sooner and there is less
 * waiting for buffers and fences. Proof:
 *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
 */
#define IB_MAX_SUBMIT_DWORDS (20 * 1024)

struct amdgpu_ctx {
   struct amdgpu_winsys *ws;
   amdgpu_context_handle ctx;
   amdgpu_bo_handle user_fence_bo;
   uint64_t *user_fence_cpu_address_base;
   int refcount;
   unsigned initial_num_total_rejected_cs;
   unsigned num_rejected_cs;
};

struct amdgpu_cs_buffer {
   struct amdgpu_winsys_bo *bo;
   union {
      struct {
         uint32_t priority_usage;
      } real;
      struct {
         uint32_t real_idx; /* index of underlying real BO */
      } slab;
   } u;
   enum radeon_bo_usage usage;
};

enum ib_type {
   IB_PREAMBLE,
   IB_MAIN,
   IB_NUM,
};

struct amdgpu_ib {
   struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */

   /* A buffer out of which new IBs are allocated. */
   struct pb_buffer        *big_ib_buffer;
   uint8_t                 *ib_mapped;
   unsigned                used_ib_space;

   /* The maximum seen size from cs_check_space. If the driver does
    * cs_check_space and flush, the newly allocated IB should have at least
    * this size.
    */
   unsigned                max_check_space_size;

   unsigned                max_ib_size;
   uint32_t                *ptr_ib_size;
   bool                    ptr_ib_size_inside_ib;
   enum ib_type            ib_type;
};

struct amdgpu_fence_list {
   struct pipe_fence_handle    **list;
   unsigned                    num;
   unsigned                    max;
};

struct amdgpu_cs_context {
   struct drm_amdgpu_cs_chunk_ib ib[IB_NUM];
   uint32_t                    *ib_main_addr; /* the beginning of IB before chaining */

   /* Buffers. */
   unsigned                    max_real_buffers;
   unsigned                    num_real_buffers;
   struct amdgpu_cs_buffer     *real_buffers;

   unsigned                    num_slab_buffers;
   unsigned                    max_slab_buffers;
   struct amdgpu_cs_buffer     *slab_buffers;

   unsigned                    num_sparse_buffers;
   unsigned                    max_sparse_buffers;
   struct amdgpu_cs_buffer     *sparse_buffers;

   int16_t                     *buffer_indices_hashlist;

   struct amdgpu_winsys_bo     *last_added_bo;
   unsigned                    last_added_bo_index;
   unsigned                    last_added_bo_usage;
   uint32_t                    last_added_bo_priority_usage;

   struct amdgpu_fence_list    fence_dependencies;
   struct amdgpu_fence_list    syncobj_dependencies;
   struct amdgpu_fence_list    syncobj_to_signal;

   struct pipe_fence_handle    *fence;

   /* the error returned from cs_flush for non-async submissions */
   int                         error_code;

   /* TMZ: will this command be submitted using the TMZ flag */
   bool secure;
};

#define BUFFER_HASHLIST_SIZE 4096

struct amdgpu_cs {
   struct amdgpu_ib main; /* must be first because this is inherited */
   struct amdgpu_winsys *ws;
   struct amdgpu_ctx *ctx;
   enum ring_type ring_type;
   struct drm_amdgpu_cs_chunk_fence fence_chunk;

   /* We flip between these two CS. While one is being consumed
    * by the kernel in another thread, the other one is being filled
    * by the pipe driver. */
   struct amdgpu_cs_context csc1;
   struct amdgpu_cs_context csc2;
   /* The currently-used CS. */
   struct amdgpu_cs_context *csc;
   /* The CS being currently-owned by the other thread. */
   struct amdgpu_cs_context *cst;
   /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
    * isn't part of any buffer lists or the index where the bo could be found.
    * Since 1) hash collisions of 2 different bo can happen and 2) we use a
    * single hashlist for the 3 buffer list, this is only a hint.
    * amdgpu_lookup_buffer uses this hint to speed up buffers look up.
    */
   int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];

   /* Flush CS. */
   void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
   void *flush_data;
   bool stop_exec_on_failure;
   bool noop;
   bool has_chaining;

   struct util_queue_fence flush_completed;
   struct pipe_fence_handle *next_fence;
   struct pb_buffer *preamble_ib_bo;
};

struct amdgpu_fence {
   struct pipe_reference reference;
   /* If ctx == NULL, this fence is syncobj-based. */
   uint32_t syncobj;

   struct amdgpu_winsys *ws;
   struct amdgpu_ctx *ctx;  /* submission context */
   struct amdgpu_cs_fence fence;
   uint64_t *user_fence_cpu_address;

   /* If the fence has been submitted. This is unsignalled for deferred fences
    * (cs->next_fence) and while an IB is still being submitted in the submit
    * thread. */
   struct util_queue_fence submitted;

   volatile int signalled;              /* bool (int for atomicity) */
};

static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
{
   return fence->ctx == NULL;
}

static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
{
   if (p_atomic_dec_zero(&ctx->refcount)) {
      amdgpu_cs_ctx_free(ctx->ctx);
      amdgpu_bo_free(ctx->user_fence_bo);
      FREE(ctx);
   }
}

static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
                                          struct pipe_fence_handle *src)
{
   struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
   struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;

   if (pipe_reference(&(*adst)->reference, &asrc->reference)) {
      struct amdgpu_fence *fence = *adst;

      if (amdgpu_fence_is_syncobj(fence))
         amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
      else
         amdgpu_ctx_unref(fence->ctx);

      util_queue_fence_destroy(&fence->submitted);
      FREE(fence);
   }
   *adst = asrc;
}

int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);

static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_cmdbuf *rcs)
{
   struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
   assert(!cs || cs->main.ib_type == IB_MAIN);
   return cs;
}

#define get_container(member_ptr, container_type, container_member) \
   (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))

static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
                              struct amdgpu_winsys_bo *bo)
{
   return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1;
}

static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
                                         struct amdgpu_winsys_bo *bo,
                                         enum radeon_bo_usage usage)
{
   int index;
   struct amdgpu_cs_buffer *buffer;

   index = amdgpu_lookup_buffer_any_type(cs->csc, bo);
   if (index == -1)
      return false;

   buffer = bo->bo ? &cs->csc->real_buffers[index] :
            bo->base.usage & RADEON_FLAG_SPARSE ? &cs->csc->sparse_buffers[index] :
            &cs->csc->slab_buffers[index];

   return (buffer->usage & usage) != 0;
}

bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
                       bool absolute);
void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,
                       unsigned num_fences,
                       struct pipe_fence_handle **fences);
void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws);

#endif