1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
|
#ifndef __R600_STATE_H__
#define __R600_STATE_H__
#include "xf86drm.h"
typedef int bool_t;
#define CLEAR(x) memset (&x, 0, sizeof(x))
/* Sequencer / thread handling */
typedef struct {
int ps_prio;
int vs_prio;
int gs_prio;
int es_prio;
int num_ps_gprs;
int num_vs_gprs;
int num_gs_gprs;
int num_es_gprs;
int num_temp_gprs;
int num_ps_threads;
int num_vs_threads;
int num_gs_threads;
int num_es_threads;
int num_ps_stack_entries;
int num_vs_stack_entries;
int num_gs_stack_entries;
int num_es_stack_entries;
} sq_config_t;
/* Color buffer / render target */
typedef struct {
int id;
int w;
int h;
uint64_t base;
int format;
int endian;
int array_mode; // tiling
int number_type;
int read_size;
int comp_swap;
int tile_mode;
int blend_clamp;
int clear_color;
int blend_bypass;
int blend_float32;
int simple_float;
int round_mode;
int tile_compact;
int source_format;
struct radeon_bo *bo;
} cb_config_t;
/* Depth buffer */
typedef struct {
int w;
int h;
uint64_t base;
int format;
int read_size;
int array_mode; // tiling
int tile_surface_en;
int tile_compact;
int zrange_precision;
struct radeon_bo *bo;
} db_config_t;
/* Shader */
typedef struct {
uint64_t shader_addr;
int num_gprs;
int stack_size;
int dx10_clamp;
int prime_cache_pgm_en;
int prime_cache_on_draw;
int fetch_cache_lines;
int prime_cache_en;
int prime_cache_on_const;
int clamp_consts;
int export_mode;
int uncached_first_inst;
struct radeon_bo *bo;
} shader_config_t;
/* Vertex buffer / vtx resource */
typedef struct {
int id;
uint64_t vb_addr;
uint32_t vtx_num_entries;
uint32_t vtx_size_dw;
int clamp_x;
int format;
int num_format_all;
int format_comp_all;
int srf_mode_all;
int endian;
int mem_req_size;
struct radeon_bo *bo;
} vtx_resource_t;
/* Texture resource */
typedef struct {
int id;
int w;
int h;
int pitch;
int depth;
int dim;
int tile_mode;
int tile_type;
int format;
uint64_t base;
uint64_t mip_base;
int format_comp_x;
int format_comp_y;
int format_comp_z;
int format_comp_w;
int num_format_all;
int srf_mode_all;
int force_degamma;
int endian;
int request_size;
int dst_sel_x;
int dst_sel_y;
int dst_sel_z;
int dst_sel_w;
int base_level;
int last_level;
int base_array;
int last_array;
int mpeg_clamp;
int perf_modulation;
int interlaced;
struct radeon_bo *bo;
struct radeon_bo *mip_bo;
} tex_resource_t;
/* Texture sampler */
typedef struct {
int id;
/* Clamping */
int clamp_x, clamp_y, clamp_z;
int border_color;
/* Filtering */
int xy_mag_filter, xy_min_filter;
int z_filter;
int mip_filter;
bool_t high_precision_filter; /* ? */
int perf_mip; /* ? 0-7 */
int perf_z; /* ? 3 */
/* LoD selection */
int min_lod, max_lod; /* 0-0x3ff */
int lod_bias; /* 0-0xfff (signed?) */
int lod_bias2; /* ? 0-0xfff (signed?) */
bool_t lod_uses_minor_axis; /* ? */
/* Other stuff */
bool_t point_sampling_clamp; /* ? */
bool_t tex_array_override; /* ? */
bool_t mc_coord_truncate; /* ? */
bool_t force_degamma; /* ? */
bool_t fetch_4; /* ? */
bool_t sample_is_pcf; /* ? */
bool_t type; /* ? */
int depth_compare; /* only depth textures? */
int chroma_key;
} tex_sampler_t;
/* Draw command */
typedef struct {
uint32_t prim_type;
uint32_t vgt_draw_initiator;
uint32_t index_type;
uint32_t num_instances;
uint32_t num_indices;
} draw_config_t;
#if defined(XF86DRM_MODE)
#define BEGIN_BATCH(n) \
do { \
if (info->cs) \
radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \
} while(0)
#define END_BATCH() \
do { \
if (info->cs) \
radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \
} while(0)
#define RELOC_BATCH(bo, rd, wd) \
do { \
if (info->cs) { \
int _ret; \
_ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0); \
if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \
} \
} while(0)
#define E32(ib, dword) \
do { \
if (info->cs) \
radeon_cs_write_dword(info->cs, (dword)); \
else { \
uint32_t *ib_head = (pointer)(char*)(ib)->address; \
ib_head[(ib)->used >> 2] = (dword); \
(ib)->used += 4; \
} \
} while (0)
#else
#define BEGIN_BATCH(n) do {} while(0)
#define END_BATCH() do {} while(0)
#define RELOC_BATCH(bo, wd, rd) do {} while(0)
#define E32(ib, dword) \
do { \
uint32_t *ib_head = (pointer)(char*)(ib)->address; \
ib_head[(ib)->used >> 2] = (dword); \
(ib)->used += 4; \
} while (0)
#endif
#define EFLOAT(ib, val) \
do { \
union { float f; uint32_t d; } a; \
a.f = (val); \
E32((ib), a.d); \
} while (0)
#define PACK3(ib, cmd, num) \
do { \
E32((ib), RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \
} while (0)
/* write num registers, start at reg */
/* If register falls in a special area, special commands are issued */
#define PACK0(ib, reg, num) \
do { \
if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \
PACK3((ib), IT_SET_CONFIG_REG, (num) + 1); \
E32(ib, ((reg) - SET_CONFIG_REG_offset) >> 2); \
} else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \
PACK3((ib), IT_SET_CONTEXT_REG, (num) + 1); \
E32(ib, ((reg) - SET_CONTEXT_REG_offset) >> 2); \
} else if ((reg) >= SET_ALU_CONST_offset && (reg) < SET_ALU_CONST_end) { \
PACK3((ib), IT_SET_ALU_CONST, (num) + 1); \
E32(ib, ((reg) - SET_ALU_CONST_offset) >> 2); \
} else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \
PACK3((ib), IT_SET_RESOURCE, num + 1); \
E32((ib), ((reg) - SET_RESOURCE_offset) >> 2); \
} else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \
PACK3((ib), IT_SET_SAMPLER, (num) + 1); \
E32((ib), (reg - SET_SAMPLER_offset) >> 2); \
} else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \
PACK3((ib), IT_SET_CTL_CONST, (num) + 1); \
E32((ib), ((reg) - SET_CTL_CONST_offset) >> 2); \
} else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \
PACK3((ib), IT_SET_LOOP_CONST, (num) + 1); \
E32((ib), ((reg) - SET_LOOP_CONST_offset) >> 2); \
} else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \
PACK3((ib), IT_SET_BOOL_CONST, (num) + 1); \
E32((ib), ((reg) - SET_BOOL_CONST_offset) >> 2); \
} else { \
E32((ib), CP_PACKET0 ((reg), (num) - 1)); \
} \
} while (0)
/* write a single register */
#define EREG(ib, reg, val) \
do { \
PACK0((ib), (reg), 1); \
E32((ib), (val)); \
} while (0)
void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib);
void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib);
uint64_t
upload (ScrnInfoPtr pScrn, void *shader, int size, int offset);
void
wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
void
wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
void
start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
void
set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf);
void
cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain);
void
cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
void
fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf);
void
vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf);
void
ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf);
void
set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
void
set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
void
set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res);
void
set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res);
void
set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
void
set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
void
set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
void
set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
void
draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
void
draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
Bool
r600_vb_get(ScrnInfoPtr pScrn);
void
r600_vb_discard(ScrnInfoPtr pScrn);
int
r600_cp_start(ScrnInfoPtr pScrn);
void
r600_finish_op(ScrnInfoPtr pScrn);
extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index);
extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index);
extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align);
extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv);
extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix);
#endif
|