diff options
Diffstat (limited to 'src')
37 files changed, 2990 insertions, 2085 deletions
diff --git a/src/common.h b/src/common.h index 73b4d05f..6f35d56d 100644 --- a/src/common.h +++ b/src/common.h @@ -412,6 +412,7 @@ intel_host_bridge (void); enum { INTEL_CREATE_PIXMAP_TILING_X = 0x10000000, INTEL_CREATE_PIXMAP_TILING_Y, + INTEL_CREATE_PIXMAP_TILING_NONE, }; #endif /* _INTEL_COMMON_H_ */ diff --git a/src/drmmode_display.c b/src/drmmode_display.c index e0df3961..de40fe9b 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -336,7 +336,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, crtc->y = y; crtc->rotation = rotation; - output_ids = xcalloc(sizeof(uint32_t), xf86_config->num_output); + output_ids = calloc(sizeof(uint32_t), xf86_config->num_output); if (!output_ids) { ret = FALSE; goto done; @@ -729,7 +729,7 @@ static int drmmode_output_lvds_edid(xf86OutputPtr output, * device. This is similar to what we have done in i830_lvds.c */ edid_mon = NULL; - edid_mon = xcalloc(1, sizeof(xf86Monitor)); + edid_mon = calloc(1, sizeof(xf86Monitor)); if (!edid_mon) { xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "Can't allocate memory for edid_mon.\n"); @@ -869,17 +869,17 @@ drmmode_output_destroy(xf86OutputPtr output) drmModeFreePropertyBlob(drmmode_output->edid_blob); for (i = 0; i < drmmode_output->num_props; i++) { drmModeFreeProperty(drmmode_output->props[i].mode_prop); - xfree(drmmode_output->props[i].atoms); + free(drmmode_output->props[i].atoms); } - xfree(drmmode_output->props); + free(drmmode_output->props); drmModeFreeConnector(drmmode_output->mode_output); if (drmmode_output->private_data) { - xfree(drmmode_output->private_data); + free(drmmode_output->private_data); drmmode_output->private_data = NULL; } if (drmmode_output->backlight_iface) drmmode_backlight_set(output, drmmode_output->backlight_active_level); - xfree(drmmode_output); + free(drmmode_output); output->driver_private = NULL; } @@ -970,7 +970,7 @@ drmmode_output_create_resources(xf86OutputPtr output) drmModePropertyPtr drmmode_prop; int i, j, err; - drmmode_output->props = xcalloc(mode_output->count_props, sizeof(drmmode_prop_rec)); + drmmode_output->props = calloc(mode_output->count_props, sizeof(drmmode_prop_rec)); if (!drmmode_output->props) return; @@ -995,7 +995,7 @@ drmmode_output_create_resources(xf86OutputPtr output) INT32 range[2]; p->num_atoms = 1; - p->atoms = xcalloc(p->num_atoms, sizeof(Atom)); + p->atoms = calloc(p->num_atoms, sizeof(Atom)); if (!p->atoms) continue; p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE); @@ -1017,7 +1017,7 @@ drmmode_output_create_resources(xf86OutputPtr output) } } else if (drmmode_prop->flags & DRM_MODE_PROP_ENUM) { p->num_atoms = drmmode_prop->count_enums + 1; - p->atoms = xcalloc(p->num_atoms, sizeof(Atom)); + p->atoms = calloc(p->num_atoms, sizeof(Atom)); if (!p->atoms) continue; p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE); @@ -1262,7 +1262,7 @@ drmmode_output_init(ScrnInfoPtr scrn, drmmode_ptr drmmode, int num) return; } - drmmode_output = xcalloc(sizeof(drmmode_output_private_rec), 1); + drmmode_output = calloc(sizeof(drmmode_output_private_rec), 1); if (!drmmode_output) { xf86OutputDestroy(output); drmModeFreeConnector(koutput); @@ -1276,7 +1276,7 @@ drmmode_output_init(ScrnInfoPtr scrn, drmmode_ptr drmmode, int num) */ drmmode_output->private_data = NULL; if (koutput->connector_type == DRM_MODE_CONNECTOR_LVDS) { - drmmode_output->private_data = xcalloc( + drmmode_output->private_data = calloc( sizeof(struct fixed_panel_lvds), 1); if (!drmmode_output->private_data) xf86DrvMsg(scrn->scrnIndex, X_ERROR, diff --git a/src/i810_dga.c b/src/i810_dga.c index 3f530579..52a01b76 100644 --- a/src/i810_dga.c +++ b/src/i810_dga.c @@ -84,10 +84,10 @@ I810DGAInit(ScreenPtr pScreen) while (pMode) { - newmodes = xrealloc(modes, (num + 1) * sizeof(DGAModeRec)); + newmodes = realloc(modes, (num + 1) * sizeof(DGAModeRec)); if (!newmodes) { - xfree(modes); + free(modes); return FALSE; } modes = newmodes; diff --git a/src/i810_dri.c b/src/i810_dri.c index e566acf6..c4022423 100644 --- a/src/i810_dri.c +++ b/src/i810_dri.c @@ -172,25 +172,25 @@ I810InitVisualConfigs(ScreenPtr pScreen) numConfigs = 8; pConfigs = - (__GLXvisualConfig *) xcalloc(sizeof(__GLXvisualConfig), + (__GLXvisualConfig *) calloc(sizeof(__GLXvisualConfig), numConfigs); if (!pConfigs) return FALSE; pI810Configs = - (I810ConfigPrivPtr) xcalloc(sizeof(I810ConfigPrivRec), + (I810ConfigPrivPtr) calloc(sizeof(I810ConfigPrivRec), numConfigs); if (!pI810Configs) { - xfree(pConfigs); + free(pConfigs); return FALSE; } pI810ConfigPtrs = - (I810ConfigPrivPtr *) xcalloc(sizeof(I810ConfigPrivPtr), + (I810ConfigPrivPtr *) calloc(sizeof(I810ConfigPrivPtr), numConfigs); if (!pI810ConfigPtrs) { - xfree(pConfigs); - xfree(pI810Configs); + free(pConfigs); + free(pI810Configs); return FALSE; } @@ -338,7 +338,7 @@ I810DRIScreenInit(ScreenPtr pScreen) if (xf86LoaderCheckSymbol("DRICreatePCIBusID")) { pDRIInfo->busIdString = DRICreatePCIBusID(pI810->PciInfo); } else { - pDRIInfo->busIdString = xalloc(64); + pDRIInfo->busIdString = malloc(64); sprintf(pDRIInfo->busIdString, "PCI:%d:%d:%d", ((pI810->PciInfo->domain << 8) | pI810->PciInfo->bus), pI810->PciInfo->dev, pI810->PciInfo->func @@ -370,7 +370,7 @@ I810DRIScreenInit(ScreenPtr pScreen) } pDRIInfo->SAREASize = SAREA_MAX; - if (!(pI810DRI = (I810DRIPtr) xcalloc(sizeof(I810DRIRec), 1))) { + if (!(pI810DRI = (I810DRIPtr) calloc(sizeof(I810DRIRec), 1))) { DRIDestroyInfoRec(pI810->pDRIInfo); pI810->pDRIInfo = NULL; return FALSE; @@ -399,7 +399,7 @@ I810DRIScreenInit(ScreenPtr pScreen) if (!DRIScreenInit(pScreen, pDRIInfo, &pI810->drmSubFD)) { xf86DrvMsg(pScreen->myNum, X_ERROR, "[dri] DRIScreenInit failed. Disabling DRI.\n"); - xfree(pDRIInfo->devPrivate); + free(pDRIInfo->devPrivate); pDRIInfo->devPrivate = NULL; DRIDestroyInfoRec(pI810->pDRIInfo); pI810->pDRIInfo = NULL; @@ -1055,16 +1055,16 @@ I810DRICloseScreen(ScreenPtr pScreen) if (pI810->pDRIInfo) { if (pI810->pDRIInfo->devPrivate) { - xfree(pI810->pDRIInfo->devPrivate); + free(pI810->pDRIInfo->devPrivate); pI810->pDRIInfo->devPrivate = NULL; } DRIDestroyInfoRec(pI810->pDRIInfo); pI810->pDRIInfo = NULL; } if (pI810->pVisualConfigs) - xfree(pI810->pVisualConfigs); + free(pI810->pVisualConfigs); if (pI810->pVisualConfigsPriv) - xfree(pI810->pVisualConfigsPriv); + free(pI810->pVisualConfigsPriv); } static Bool @@ -1202,12 +1202,12 @@ I810DRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, if (nbox > 1) { /* Keep ordering in each band, reverse order of bands */ - pboxNew1 = (BoxPtr) xalloc(sizeof(BoxRec) * nbox); + pboxNew1 = (BoxPtr) malloc(sizeof(BoxRec) * nbox); if (!pboxNew1) return; - pptNew1 = (DDXPointPtr) xalloc(sizeof(DDXPointRec) * nbox); + pptNew1 = (DDXPointPtr) malloc(sizeof(DDXPointRec) * nbox); if (!pptNew1) { - xfree(pboxNew1); + free(pboxNew1); return; } pboxBase = pboxNext = pbox + nbox - 1; @@ -1238,16 +1238,16 @@ I810DRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, if (nbox > 1) { /*reverse orderof rects in each band */ - pboxNew2 = (BoxPtr) xalloc(sizeof(BoxRec) * nbox); - pptNew2 = (DDXPointPtr) xalloc(sizeof(DDXPointRec) * nbox); + pboxNew2 = (BoxPtr) malloc(sizeof(BoxRec) * nbox); + pptNew2 = (DDXPointPtr) malloc(sizeof(DDXPointRec) * nbox); if (!pboxNew2 || !pptNew2) { if (pptNew2) - xfree(pptNew2); + free(pptNew2); if (pboxNew2) - xfree(pboxNew2); + free(pboxNew2); if (pboxNew1) { - xfree(pptNew1); - xfree(pboxNew1); + free(pptNew1); + free(pboxNew1); } return; } @@ -1312,12 +1312,12 @@ I810DRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, I810EmitFlush(pScrn); if (pboxNew2) { - xfree(pptNew2); - xfree(pboxNew2); + free(pptNew2); + free(pboxNew2); } if (pboxNew1) { - xfree(pptNew1); - xfree(pboxNew1); + free(pptNew1); + free(pboxNew1); } if (pI810->AccelInfoRec) diff --git a/src/i810_driver.c b/src/i810_driver.c index 68e45bb6..345854f2 100644 --- a/src/i810_driver.c +++ b/src/i810_driver.c @@ -364,7 +364,7 @@ I810FreeRec(ScrnInfoPtr pScrn) return; if (!pScrn->driverPrivate) return; - xfree(pScrn->driverPrivate); + free(pScrn->driverPrivate); pScrn->driverPrivate = NULL; } #endif @@ -618,7 +618,7 @@ I810PreInit(ScrnInfoPtr pScrn, int flags) /* Process the options */ xf86CollectOptions(pScrn, NULL); - if (!(pI810->Options = xalloc(sizeof(I810Options)))) + if (!(pI810->Options = malloc(sizeof(I810Options)))) return FALSE; memcpy(pI810->Options, I810Options, sizeof(I810Options)); xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, pI810->Options); @@ -1914,7 +1914,7 @@ I810ScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) pI810 = I810PTR(pScrn); hwp = VGAHWPTR(pScrn); - pI810->LpRing = xcalloc(sizeof(I810RingBuffer),1); + pI810->LpRing = calloc(sizeof(I810RingBuffer),1); if (!pI810->LpRing) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Could not allocate lpring data structure.\n"); @@ -2326,13 +2326,13 @@ I810CloseScreen(int scrnIndex, ScreenPtr pScreen) vgaHWUnmapMem(pScrn); if (pI810->ScanlineColorExpandBuffers) { - xfree(pI810->ScanlineColorExpandBuffers); + free(pI810->ScanlineColorExpandBuffers); pI810->ScanlineColorExpandBuffers = NULL; } if (infoPtr) { if (infoPtr->ScanlineColorExpandBuffers) - xfree(infoPtr->ScanlineColorExpandBuffers); + free(infoPtr->ScanlineColorExpandBuffers); XAADestroyInfoRec(infoPtr); pI810->AccelInfoRec = NULL; } @@ -2353,7 +2353,7 @@ I810CloseScreen(int scrnIndex, ScreenPtr pScreen) */ xf86GARTCloseScreen(scrnIndex); - xfree(pI810->LpRing); + free(pI810->LpRing); pI810->LpRing = NULL; pScrn->vtSema = FALSE; diff --git a/src/i810_hwmc.c b/src/i810_hwmc.c index 1c3ffc99..724e1bea 100644 --- a/src/i810_hwmc.c +++ b/src/i810_hwmc.c @@ -230,7 +230,7 @@ void I810InitMC(ScreenPtr pScreen) * Set *num_priv to the number of 32bit words that make up the size of * of the data that priv will point to. * - * *priv = (long *) xcalloc (elements, sizeof(element)) + * *priv = (long *) calloc (elements, sizeof(element)) * *num_priv = (elements * sizeof(element)) >> 2; * **************************************************************************/ @@ -256,7 +256,7 @@ int I810XvMCCreateContext (ScrnInfoPtr pScrn, XvMCContextPtr pContext, return BadAlloc; } - *priv = xcalloc(1,sizeof(I810XvMCCreateContextRec)); + *priv = calloc(1,sizeof(I810XvMCCreateContextRec)); contextRec = (I810XvMCCreateContextRec *)*priv; if(!*priv) { @@ -268,7 +268,7 @@ int I810XvMCCreateContext (ScrnInfoPtr pScrn, XvMCContextPtr pContext, if(drmCreateContext(pI810->drmSubFD, &(contextRec->drmcontext) ) < 0) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "I810XvMCCreateContext: Unable to create DRMContext!\n"); - xfree(*priv); + free(*priv); return BadAlloc; } @@ -295,7 +295,7 @@ int I810XvMCCreateSurface (ScrnInfoPtr pScrn, XvMCSurfacePtr pSurf, I810Ptr pI810 = I810PTR(pScrn); int i; - *priv = (long *)xcalloc(2,sizeof(long)); + *priv = (long *)calloc(2,sizeof(long)); if(!*priv) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, @@ -341,7 +341,7 @@ int I810XvMCCreateSubpicture (ScrnInfoPtr pScrn, XvMCSubpicturePtr pSubp, I810Ptr pI810 = I810PTR(pScrn); int i; - *priv = (long *)xcalloc(1,sizeof(long)); + *priv = (long *)calloc(1,sizeof(long)); if(!*priv) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, diff --git a/src/i810_video.c b/src/i810_video.c index ee1a2326..85b5b65c 100644 --- a/src/i810_video.c +++ b/src/i810_video.c @@ -174,7 +174,7 @@ void I810InitVideo(ScreenPtr pScreen) adaptors = &newAdaptor; } else { newAdaptors = /* need to free this someplace */ - xalloc((num_adaptors + 1) * sizeof(XF86VideoAdaptorPtr*)); + malloc((num_adaptors + 1) * sizeof(XF86VideoAdaptorPtr*)); if(newAdaptors) { memcpy(newAdaptors, adaptors, num_adaptors * sizeof(XF86VideoAdaptorPtr)); @@ -189,7 +189,7 @@ void I810InitVideo(ScreenPtr pScreen) xf86XVScreenInit(pScreen, adaptors, num_adaptors); if(newAdaptors) - xfree(newAdaptors); + free(newAdaptors); } /* *INDENT-OFF* */ @@ -383,7 +383,7 @@ I810SetupImageVideo(ScreenPtr pScreen) XF86VideoAdaptorPtr adapt; I810PortPrivPtr pPriv; - if(!(adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec) + + if(!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) + sizeof(I810PortPrivRec) + sizeof(DevUnion)))) return NULL; @@ -1225,18 +1225,18 @@ I810AllocateSurface( surface->width = w; surface->height = h; - if(!(surface->pitches = xalloc(sizeof(int)))) { + if(!(surface->pitches = malloc(sizeof(int)))) { xf86FreeOffscreenLinear(linear); return BadAlloc; } - if(!(surface->offsets = xalloc(sizeof(int)))) { - xfree(surface->pitches); + if(!(surface->offsets = malloc(sizeof(int)))) { + free(surface->pitches); xf86FreeOffscreenLinear(linear); return BadAlloc; } - if(!(pPriv = xalloc(sizeof(OffscreenPrivRec)))) { - xfree(surface->pitches); - xfree(surface->offsets); + if(!(pPriv = malloc(sizeof(OffscreenPrivRec)))) { + free(surface->pitches); + free(surface->offsets); xf86FreeOffscreenLinear(linear); return BadAlloc; } @@ -1286,9 +1286,9 @@ I810FreeSurface( I810StopSurface(surface); } xf86FreeOffscreenLinear(pPriv->linear); - xfree(surface->pitches); - xfree(surface->offsets); - xfree(surface->devPrivate.ptr); + free(surface->pitches); + free(surface->offsets); + free(surface->devPrivate.ptr); return Success; } @@ -1401,7 +1401,7 @@ I810InitOffscreenImages(ScreenPtr pScreen) XF86OffscreenImagePtr offscreenImages; /* need to free this someplace */ - if(!(offscreenImages = xalloc(sizeof(XF86OffscreenImageRec)))) { + if(!(offscreenImages = malloc(sizeof(XF86OffscreenImageRec)))) { return; } @@ -78,6 +78,15 @@ void i830_uxa_block_handler(ScreenPtr pScreen); Bool i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table, int num_bos); +/* XXX + * The X server gained an *almost* identical implementation in 1.9. + * + * Remove this duplicate code either in 2.16 (when we can depend upon 1.9) + * or the drivers are merged back into the xserver tree, whichever happens + * earlier. + */ + +#ifndef _LIST_H_ /* classic doubly-link circular list */ struct list { struct list *next, *prev; @@ -125,41 +134,77 @@ list_is_empty(struct list *head) { return head->next == head; } +#endif #ifndef container_of #define container_of(ptr, type, member) \ (type *)((char *)(ptr) - (char *) &((type *)0)->member) #endif +#ifndef list_entry #define list_entry(ptr, type, member) \ container_of(ptr, type, member) +#endif +#ifndef list_first_entry #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) +#endif +#ifndef list_foreach #define list_foreach(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) +#endif +/* XXX list.h from xserver-1.9 uses a GCC-ism to avoid having to pass type */ +#ifndef list_foreach_entry #define list_foreach_entry(pos, type, head, member) \ for (pos = list_entry((head)->next, type, member);\ &pos->member != (head); \ pos = list_entry(pos->member.next, type, member)) +#endif struct intel_pixmap { dri_bo *bo; - uint32_t tiling, stride; - uint32_t flush_write_domain; - uint32_t flush_read_domains; - uint32_t batch_write_domain; - uint32_t batch_read_domains; + struct list flush, batch, in_flight; + + uint16_t stride; + uint8_t tiling; + int8_t busy :2; + int8_t batch_write :1; }; -struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap); +#if HAS_DEVPRIVATEKEYREC +extern DevPrivateKeyRec uxa_pixmap_index; +#else +extern int uxa_pixmap_index; +#endif + +static inline struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap) +{ +#if HAS_DEVPRIVATEKEYREC + return dixGetPrivate(&pixmap->devPrivates, &uxa_pixmap_index); +#else + return dixLookupPrivate(&pixmap->devPrivates, &uxa_pixmap_index); +#endif +} + +static inline Bool intel_pixmap_is_busy(struct intel_pixmap *priv) +{ + if (priv->busy == -1) + priv->busy = drm_intel_bo_busy(priv->bo); + return priv->busy; +} + +static inline void i830_set_pixmap_intel(PixmapPtr pixmap, struct intel_pixmap *intel) +{ + dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, intel); +} static inline Bool i830_uxa_pixmap_is_dirty(PixmapPtr pixmap) { - return i830_get_pixmap_intel(pixmap)->flush_write_domain != 0; + return !list_is_empty(&i830_get_pixmap_intel(pixmap)->flush); } static inline Bool i830_pixmap_tiled(PixmapPtr pixmap) @@ -266,15 +311,6 @@ struct _i830_memory { Bool lifetime_fixed_offset; }; -typedef struct { - int tail_mask; - i830_memory *mem; - unsigned char *virtual_start; - int head; - int tail; - int space; -} I830RingBuffer; - /* store information about an Ixxx DVO */ /* The i830->i865 use multiple DVOs with multiple i2cs */ /* the i915, i945 have a single sDVO i2c bus - which is different */ @@ -459,22 +495,10 @@ typedef struct intel_screen_private { i830_memory *cursor_mem_classic[2]; /* One big buffer for all cursors for kernels that support this */ i830_memory *cursor_mem_argb[2]; - i830_memory *fake_bufmgr_mem; - - /* Regions allocated either from the above pools, or from agpgart. */ - I830RingBuffer ring; - - /** Number of bytes being emitted in the current BEGIN_LP_RING */ - unsigned int ring_emitting; - /** Number of bytes that have been emitted in the current BEGIN_LP_RING */ - unsigned int ring_used; - /** Offset in the ring for the next DWORD emit */ - uint32_t ring_next; - dri_bufmgr *bufmgr; - uint8_t *batch_ptr; + uint32_t batch_ptr[4096]; /** Byte offset in batch_ptr for the next dword to be emitted. */ unsigned int batch_used; /** Position in batch_ptr at the start of the current BEGIN_BATCH */ @@ -508,8 +532,6 @@ typedef struct intel_screen_private { i830_memory *memory_manager; /**< DRI memory manager aperture */ - Bool have_gem; - Bool need_mi_flush; Bool tiling; @@ -533,6 +555,7 @@ typedef struct intel_screen_private { CloseScreenProcPtr CloseScreen; + void (*vertex_flush) (struct intel_screen_private *intel); void (*batch_flush_notify) (ScrnInfoPtr scrn); uxa_driver_t *uxa_driver; @@ -541,6 +564,7 @@ typedef struct intel_screen_private { int accel_pixmap_offset_alignment; int accel_max_x; int accel_max_y; + int max_bo_size; int max_gtt_map_size; int max_tiling_size; @@ -573,17 +597,17 @@ typedef struct intel_screen_private { float scale_units[2][2]; /** Transform pointers for src/mask, or NULL if identity */ PictTransform *transform[2]; - float dst_coord_adjust; - float src_coord_adjust; - float mask_coord_adjust; PixmapPtr render_source, render_mask, render_dest; PicturePtr render_source_picture, render_mask_picture, render_dest_picture; CARD32 render_source_solid; CARD32 render_mask_solid; + PixmapPtr render_current_dest; Bool render_source_is_solid; Bool render_mask_is_solid; Bool needs_render_state_emit; + Bool needs_render_vertex_emit; + Bool needs_render_ca_pass; /* i830 render accel state */ uint32_t render_dest_format; @@ -599,6 +623,20 @@ typedef struct intel_screen_private { uint32_t dst_format; } i915_render_state; + uint32_t prim_offset; + void (*prim_emit)(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h); + int floats_per_vertex; + int last_floats_per_vertex; + uint32_t vertex_count; + uint32_t vertex_index; + uint32_t vertex_used; + float vertex_ptr[4*1024]; + dri_bo *vertex_bo; + /* 965 render acceleration state */ struct gen4_render_state *gen4_render_state; @@ -606,6 +644,9 @@ typedef struct intel_screen_private { Bool directRenderingOpen; int drmSubFD; +#ifdef notyet + drmEventContext event_context; +#endif char *deviceName; /* Broken-out options. */ @@ -631,8 +672,6 @@ typedef struct intel_screen_private { unsigned int SaveGeneration; - OsTimerPtr devicesTimer; - int ddc2; enum backlight_control backlight_control_method; @@ -716,7 +755,6 @@ typedef struct intel_screen_private { enum last_3d last_3d; Bool use_drm_mode; - Bool kernel_exec_fencing; /** Enables logging of debug output related to mode switching. */ Bool debug_modes; @@ -763,7 +801,6 @@ unsigned long intel_get_pixmap_pitch(PixmapPtr pixmap); #include "i830_batchbuffer.h" /* I830 specific functions */ -extern int I830WaitLpRing(ScrnInfoPtr scrn, int n, int timeout_millis); extern void I830SetPIOAccess(intel_screen_private *intel); extern void I830SetMMIOAccess(intel_screen_private *intel); extern void I830InitHWCursor(ScrnInfoPtr scrn); @@ -816,6 +853,8 @@ i830_pipe_a_require_deactivate (ScrnInfoPtr scrn); Bool I830DRI2ScreenInit(ScreenPtr pScreen); void I830DRI2CloseScreen(ScreenPtr pScreen); +void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *user_data); extern Bool drmmode_pre_init(ScrnInfoPtr scrn, int fd, int cpp); extern int drmmode_get_pipe_from_crtc_id(drm_intel_bufmgr * bufmgr, @@ -903,8 +942,11 @@ DisplayModePtr i830_ddc_get_modes(xf86OutputPtr output); void i830_tv_init(ScrnInfoPtr scrn); /* i830_render.c */ -Bool i830_check_composite(int op, PicturePtr sourcec, PicturePtr mask, - PicturePtr dest); +Bool i830_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i830_check_composite_target(PixmapPtr pixmap); +Bool i830_check_composite_texture(ScreenPtr screen, PicturePtr picture); Bool i830_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, PicturePtr dest, PixmapPtr sourcecPixmap, PixmapPtr maskPixmap, PixmapPtr destPixmap); @@ -914,21 +956,27 @@ void i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h); void i830_done_composite(PixmapPtr dest); /* i915_render.c */ -Bool i915_check_composite(int op, PicturePtr sourcec, PicturePtr mask, - PicturePtr dest); +Bool i915_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i915_check_composite_target(PixmapPtr pixmap); +Bool i915_check_composite_texture(ScreenPtr screen, PicturePtr picture); Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, PicturePtr dest, PixmapPtr sourcecPixmap, PixmapPtr maskPixmap, PixmapPtr destPixmap); void i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h); +void i915_vertex_flush(intel_screen_private *intel); void i915_batch_flush_notify(ScrnInfoPtr scrn); void i830_batch_flush_notify(ScrnInfoPtr scrn); /* i965_render.c */ unsigned int gen4_render_state_size(ScrnInfoPtr scrn); void gen4_render_state_init(ScrnInfoPtr scrn); void gen4_render_state_cleanup(ScrnInfoPtr scrn); -Bool i965_check_composite(int op, PicturePtr sourcec, PicturePtr mask, - PicturePtr dest); +Bool i965_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i965_check_composite_texture(ScreenPtr screen, PicturePtr picture); Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, PicturePtr dest, PixmapPtr sourcecPixmap, PixmapPtr maskPixmap, PixmapPtr destPixmap); @@ -949,14 +997,6 @@ void i830_enter_render(ScrnInfoPtr); extern void intel_sync(ScrnInfoPtr scrn); -static inline void -i830_wait_ring_idle(ScrnInfoPtr scrn) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - - I830WaitLpRing(scrn, intel->ring.mem->size - 8, 0); -} - static inline int i830_fb_compression_supported(intel_screen_private *intel) { if (!IS_MOBILE(intel)) diff --git a/src/i830_3d.c b/src/i830_3d.c index e83cb3f7..a92da055 100644 --- a/src/i830_3d.c +++ b/src/i830_3d.c @@ -38,7 +38,7 @@ void I830EmitInvarientState(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - ATOMIC_BATCH(58); + assert(intel->in_batch_atomic); OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); @@ -222,6 +222,4 @@ void I830EmitInvarientState(ScrnInfoPtr scrn) AA_LINE_ECAAR_WIDTH_1_0 | AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE); - - ADVANCE_BATCH(); } diff --git a/src/i830_accel.c b/src/i830_accel.c index 0b9195f3..df4f58db 100644 --- a/src/i830_accel.c +++ b/src/i830_accel.c @@ -46,80 +46,11 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "i830_ring.h" #include "i915_drm.h" -unsigned long intel_get_pixmap_offset(PixmapPtr pixmap) -{ - ScreenPtr pScreen = pixmap->drawable.pScreen; - ScrnInfoPtr scrn = xf86Screens[pScreen->myNum]; - intel_screen_private *intel = intel_get_screen_private(scrn); - - return (unsigned long)pixmap->devPrivate.ptr - - (unsigned long)intel->FbBase; -} - unsigned long intel_get_pixmap_pitch(PixmapPtr pixmap) { return (unsigned long)pixmap->devKind; } -int -I830WaitLpRing(ScrnInfoPtr scrn, int n, int timeout_millis) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - I830RingBuffer *ring = &intel->ring; - int iters = 0; - unsigned int start = 0; - unsigned int now = 0; - int last_head = 0; - unsigned int first = 0; - - /* If your system hasn't moved the head pointer in 2 seconds, I'm going to - * call it crashed. - */ - if (timeout_millis == 0) - timeout_millis = 2000; - - if (I810_DEBUG & DEBUG_VERBOSE_ACCEL) { - ErrorF("I830WaitLpRing %d\n", n); - first = GetTimeInMillis(); - } - - while (ring->space < n) { - ring->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK; - ring->space = ring->head - (ring->tail + 8); - - if (ring->space < 0) - ring->space += ring->mem->size; - - iters++; - now = GetTimeInMillis(); - if (start == 0 || now < start || ring->head != last_head) { - if (I810_DEBUG & DEBUG_VERBOSE_ACCEL) - if (now > start) - ErrorF("space: %d wanted %d\n", ring->space, n); - start = now; - last_head = ring->head; - } else if (now - start > timeout_millis) { - ErrorF("Error in I830WaitLpRing(), timeout for %d seconds\n", - timeout_millis/1000); - ErrorF("space: %d wanted %d\n", ring->space, n); - intel->uxa_driver = NULL; - FatalError("lockup\n"); - } - - DELAY(10); - } - - if (I810_DEBUG & DEBUG_VERBOSE_ACCEL) { - now = GetTimeInMillis(); - if (now - first) { - ErrorF("Elapsed %u ms\n", now - first); - ErrorF("space: %d wanted %d\n", ring->space, n); - } - } - - return iters; -} - void i830_debug_flush(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); @@ -128,7 +59,7 @@ void i830_debug_flush(ScrnInfoPtr scrn) intel_batch_emit_flush(scrn); if (intel->debug_flush & DEBUG_FLUSH_BATCHES) - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); } /* The following function sets up the supported acceleration. Call it diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c index d067d480..2ef89f24 100644 --- a/src/i830_batchbuffer.c +++ b/src/i830_batchbuffer.c @@ -40,72 +40,29 @@ #define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */ -static int -intel_nondrm_exec(dri_bo *bo, unsigned int used, void *priv) +static void intel_end_vertex(intel_screen_private *intel) { - ScrnInfoPtr scrn = priv; - intel_screen_private *intel = intel_get_screen_private(scrn); + if (intel->vertex_bo) { + if (intel->vertex_used) + dri_bo_subdata(intel->vertex_bo, 0, intel->vertex_used*4, intel->vertex_ptr); - BEGIN_LP_RING(4); - OUT_RING(MI_BATCH_BUFFER_START | (2 << 6)); - OUT_RING(bo->offset); - OUT_RING(MI_NOOP); - OUT_RING(MI_NOOP); - ADVANCE_LP_RING(); - - return 0; -} - -static int -intel_nondrm_exec_i830(dri_bo *bo, unsigned int used, void *priv) -{ - ScrnInfoPtr scrn = priv; - intel_screen_private *intel = intel_get_screen_private(scrn); - - BEGIN_LP_RING(4); - OUT_RING(MI_BATCH_BUFFER); - OUT_RING(bo->offset); - OUT_RING(bo->offset + intel->batch_used - 4); - OUT_RING(MI_NOOP); - ADVANCE_LP_RING(); - - return 0; -} - -/** - * Creates a fence value representing a request to be passed. - * - * Stub implementation that should be avoided when DRM functions are available. - */ -static unsigned int -intel_nondrm_emit(void *priv) -{ - static unsigned int fence = 0; - - /* Match DRM in not using half the range. The fake bufmgr relies on this. */ - if (++fence >= 0x8000000) - fence = 1; - - return fence; + dri_bo_unreference(intel->vertex_bo); + intel->vertex_bo = NULL; + } } -/** - * Waits on a fence representing a request to be passed. - * - * Stub implementation that should be avoided when DRM functions are available. - */ -static void -intel_nondrm_wait(unsigned int fence, void *priv) +void intel_next_vertex(intel_screen_private *intel) { - ScrnInfoPtr scrn = priv; + intel_end_vertex(intel); - i830_wait_ring_idle(scrn); + intel->vertex_bo = + dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); + intel->vertex_used = 0; } static void intel_next_batch(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - int ret; /* The 865 has issues with larger-than-page-sized batch buffers. */ if (IS_I865G(intel)) @@ -115,12 +72,7 @@ static void intel_next_batch(ScrnInfoPtr scrn) intel->batch_bo = dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096); - ret = dri_bo_map(intel->batch_bo, 1); - if (ret != 0) - FatalError("Failed to map batchbuffer: %s\n", strerror(-ret)); - intel->batch_used = 0; - intel->batch_ptr = intel->batch_bo->virtual; /* We don't know when another client has executed, so we have * to reinitialize our 3D state per batch. @@ -136,34 +88,54 @@ void intel_batch_init(ScrnInfoPtr scrn) intel->batch_emitting = 0; intel_next_batch(scrn); - - if (!intel->have_gem) { - if (IS_I830(intel) || IS_845G(intel)) { - intel_bufmgr_fake_set_exec_callback(intel->bufmgr, - intel_nondrm_exec_i830, scrn); - } else { - intel_bufmgr_fake_set_exec_callback(intel->bufmgr, - intel_nondrm_exec, scrn); - } - intel_bufmgr_fake_set_fence_callback(intel->bufmgr, - intel_nondrm_emit, intel_nondrm_wait, scrn); - } } void intel_batch_teardown(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - if (intel->batch_ptr != NULL) { - dri_bo_unmap(intel->batch_bo); - intel->batch_ptr = NULL; - + if (intel->batch_bo != NULL) { dri_bo_unreference(intel->batch_bo); intel->batch_bo = NULL; + } + if (intel->last_batch_bo != NULL) { dri_bo_unreference(intel->last_batch_bo); intel->last_batch_bo = NULL; } + + if (intel->vertex_bo) { + dri_bo_unreference(intel->vertex_bo); + intel->vertex_bo = NULL; + } + + while (!list_is_empty(&intel->batch_pixmaps)) + list_del(intel->batch_pixmaps.next); + + while (!list_is_empty(&intel->flush_pixmaps)) + list_del(intel->flush_pixmaps.next); + + while (!list_is_empty(&intel->in_flight)) { + struct intel_pixmap *entry; + + entry = list_first_entry(&intel->in_flight, + struct intel_pixmap, + in_flight); + + dri_bo_unreference(entry->bo); + list_del(&entry->in_flight); + free(entry); + } +} + +void intel_batch_do_flush(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + while (!list_is_empty(&intel->flush_pixmaps)) + list_del(intel->flush_pixmaps.next); + + intel->need_mi_flush = FALSE; } void intel_batch_emit_flush(ScrnInfoPtr scrn) @@ -182,55 +154,44 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) OUT_BATCH(MI_FLUSH | flags); ADVANCE_BATCH(); - while (!list_is_empty(&intel->flush_pixmaps)) { - struct intel_pixmap *entry; - - entry = list_first_entry(&intel->flush_pixmaps, - struct intel_pixmap, - flush); - - entry->flush_read_domains = entry->flush_write_domain = 0; - list_del(&entry->flush); - } - - intel->need_mi_flush = FALSE; + intel_batch_do_flush(scrn); } -void intel_batch_submit(ScrnInfoPtr scrn) +void intel_batch_submit(ScrnInfoPtr scrn, int flush) { intel_screen_private *intel = intel_get_screen_private(scrn); int ret; assert (!intel->in_batch_atomic); + if (intel->vertex_flush) + intel->vertex_flush(intel); + intel_end_vertex(intel); + + if (flush) + intel_batch_emit_flush(scrn); + if (intel->batch_used == 0) return; - /* Emit a padding dword if we aren't going to be quad-word aligned. */ - if ((intel->batch_used & 4) == 0) { - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = MI_NOOP; - intel->batch_used += 4; - } - /* Mark the end of the batchbuffer. */ - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = - MI_BATCH_BUFFER_END; - intel->batch_used += 4; + OUT_BATCH(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (intel->batch_used & 1) + OUT_BATCH(MI_NOOP); if (DUMP_BATCHBUFFERS) { FILE *file = fopen(DUMP_BATCHBUFFERS, "a"); if (file) { - fwrite (intel->batch_ptr, intel->batch_used, 1, file); + fwrite (intel->batch_ptr, intel->batch_used*4, 1, file); fclose(file); } } - dri_bo_unmap(intel->batch_bo); - intel->batch_ptr = NULL; - - ret = - dri_bo_exec(intel->batch_bo, intel->batch_used, NULL, 0, - 0xffffffff); + ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); + if (ret == 0) + ret = dri_bo_exec(intel->batch_bo, intel->batch_used*4, + NULL, 0, 0xffffffff); if (ret != 0) { static int once; @@ -250,25 +211,14 @@ void intel_batch_submit(ScrnInfoPtr scrn) struct intel_pixmap, batch); - entry->batch_read_domains = entry->batch_write_domain = 0; + entry->busy = -1; + entry->batch_write = 0; list_del(&entry->batch); } - /* Mark that we need to flush whatever potential rendering we've done in the - * blockhandler. We could set this less often, but it's probably not worth - * the work. - */ - intel->need_mi_flush = !list_is_empty(&intel->flush_pixmaps); - while (!list_is_empty(&intel->flush_pixmaps)) { - struct intel_pixmap *entry; - - entry = list_first_entry(&intel->flush_pixmaps, - struct intel_pixmap, - flush); - - entry->flush_read_domains = entry->flush_write_domain = 0; - list_del(&entry->flush); - } + intel->need_mi_flush |= !list_is_empty(&intel->flush_pixmaps); + while (!list_is_empty(&intel->flush_pixmaps)) + list_del(intel->flush_pixmaps.next); while (!list_is_empty(&intel->in_flight)) { struct intel_pixmap *entry; @@ -279,7 +229,7 @@ void intel_batch_submit(ScrnInfoPtr scrn) dri_bo_unreference(entry->bo); list_del(&entry->in_flight); - xfree(entry); + free(entry); } /* Save a ref to the last batch emitted, which we use for syncing @@ -306,8 +256,8 @@ void intel_batch_wait_last(ScrnInfoPtr scrn) /* Map it CPU write, which guarantees it's done. This is a completely * non performance path, so we don't need anything better. */ - drm_intel_bo_map(intel->last_batch_bo, TRUE); - drm_intel_bo_unmap(intel->last_batch_bo); + drm_intel_gem_bo_map_gtt(intel->last_batch_bo); + drm_intel_gem_bo_unmap_gtt(intel->last_batch_bo); } void intel_sync(ScrnInfoPtr scrn) @@ -320,7 +270,6 @@ void intel_sync(ScrnInfoPtr scrn) if (!scrn->vtSema || !intel->batch_bo || !intel->batch_ptr) return; - intel_batch_emit_flush(scrn); - intel_batch_submit(scrn); + intel_batch_submit(scrn, TRUE); intel_batch_wait_last(scrn); } diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h index 50cb966d..30680855 100644 --- a/src/i830_batchbuffer.h +++ b/src/i830_batchbuffer.h @@ -36,12 +36,18 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. void intel_batch_init(ScrnInfoPtr scrn); void intel_batch_teardown(ScrnInfoPtr scrn); void intel_batch_emit_flush(ScrnInfoPtr scrn); -void intel_batch_submit(ScrnInfoPtr scrn); +void intel_batch_do_flush(ScrnInfoPtr scrn); +void intel_batch_submit(ScrnInfoPtr scrn, int flush); void intel_batch_wait_last(ScrnInfoPtr scrn); static inline int intel_batch_space(intel_screen_private *intel) { - return (intel->batch_bo->size - BATCH_RESERVED) - (intel->batch_used); + return (intel->batch_bo->size - BATCH_RESERVED) - (4*intel->batch_used); +} + +static inline int intel_vertex_space(intel_screen_private *intel) +{ + return intel->vertex_bo ? intel->vertex_bo->size - (4*intel->vertex_used) : 0; } static inline void @@ -49,7 +55,7 @@ intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, GLuint { assert(sz < intel->batch_bo->size - 8); if (intel_batch_space(intel) < sz) - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); } static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz) @@ -60,7 +66,7 @@ static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, unsigned int sz) intel_batch_require_space(scrn, intel, sz * 4); intel->in_batch_atomic = TRUE; - intel->batch_atomic_limit = intel->batch_used + sz * 4; + intel->batch_atomic_limit = intel->batch_used + sz; } static inline void intel_batch_end_atomic(ScrnInfoPtr scrn) @@ -74,22 +80,19 @@ static inline void intel_batch_end_atomic(ScrnInfoPtr scrn) static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword) { - assert(intel->batch_ptr != NULL); - assert(intel->batch_emitting); - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = dword; - intel->batch_used += 4; + intel->batch_ptr[intel->batch_used++] = dword; } static inline void intel_batch_align(intel_screen_private *intel, uint32_t align) { uint32_t delta; - assert(intel->batch_ptr != NULL); + align /= 4; assert(align); if ((delta = intel->batch_used & (align - 1))) { delta = align - delta; - memset (intel->batch_ptr + intel->batch_used, 0, delta); + memset (intel->batch_ptr + intel->batch_used, 0, 4*delta); intel->batch_used += delta; } } @@ -100,20 +103,17 @@ intel_batch_emit_reloc(intel_screen_private *intel, uint32_t read_domains, uint32_t write_domains, uint32_t delta, int needs_fence) { - assert(intel_batch_space(intel) >= 4); - *(uint32_t *) (intel->batch_ptr + intel->batch_used) = - bo->offset + delta; if (needs_fence) drm_intel_bo_emit_reloc_fence(intel->batch_bo, - intel->batch_used, + intel->batch_used * 4, bo, delta, read_domains, write_domains); else - drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used, + drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used * 4, bo, delta, read_domains, write_domains); - intel->batch_used += 4; + intel_batch_emit_dword(intel, bo->offset + delta); } static inline void @@ -123,18 +123,14 @@ intel_batch_mark_pixmap_domains(intel_screen_private *intel, { assert (read_domains); assert (write_domain == 0 || write_domain == read_domains); - assert (write_domain == 0 || - priv->flush_write_domain == 0 || - priv->flush_write_domain == write_domain); - - priv->flush_read_domains |= read_domains; - priv->batch_read_domains |= read_domains; - priv->flush_write_domain |= write_domain; - priv->batch_write_domain |= write_domain; + if (list_is_empty(&priv->batch)) list_add(&priv->batch, &intel->batch_pixmaps); - if (list_is_empty(&priv->flush)) + if (write_domain && list_is_empty(&priv->flush)) list_add(&priv->flush, &intel->flush_pixmaps); + + priv->batch_write |= write_domain != 0; + priv->busy = 1; } static inline void @@ -142,21 +138,12 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap, uint32_t read_domains, uint32_t write_domain, uint32_t delta, int needs_fence) { - dri_bo *bo = i830_get_pixmap_bo(pixmap); - uint32_t offset; - assert(intel->batch_ptr != NULL); - assert(intel_batch_space(intel) >= 4); - if (bo) { - struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap); - intel_batch_mark_pixmap_domains(intel, priv, read_domains, - write_domain); - intel_batch_emit_reloc(intel, priv->bo, read_domains, - write_domain, delta, needs_fence); - return; - } - offset = intel_get_pixmap_offset(pixmap); - *(uint32_t *)(intel->batch_ptr + intel->batch_used) = offset + delta; - intel->batch_used += 4; + struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap); + + intel_batch_mark_pixmap_domains(intel, priv, read_domains, + write_domain); + intel_batch_emit_reloc(intel, priv->bo, read_domains, + write_domain, delta, needs_fence); } #define ALIGN_BATCH(align) intel_batch_align(intel, align); @@ -189,19 +176,7 @@ do { \ "ADVANCE_BATCH\n", __FUNCTION__); \ assert(!intel->in_batch_atomic); \ intel_batch_require_space(scrn, intel, (n) * 4); \ - intel->batch_emitting = (n) * 4; \ - intel->batch_emit_start = intel->batch_used; \ -} while (0) - -/* special-case variant for when we have preallocated space */ -#define ATOMIC_BATCH(n) \ -do { \ - if (intel->batch_emitting != 0) \ - FatalError("%s: ATOMIC_BATCH called without closing " \ - "ADVANCE_BATCH\n", __FUNCTION__); \ - assert(intel->in_batch_atomic); \ - assert(intel->batch_used + (n) * 4 <= intel->batch_atomic_limit); \ - intel->batch_emitting = (n) * 4; \ + intel->batch_emitting = (n); \ intel->batch_emit_start = intel->batch_used; \ } while (0) @@ -224,9 +199,16 @@ do { \ if ((intel->batch_emitting > 8) && \ (I810_DEBUG & DEBUG_ALWAYS_SYNC)) { \ /* Note: not actually syncing, just flushing each batch. */ \ - intel_batch_submit(scrn); \ + intel_batch_submit(scrn, FALSE); \ } \ intel->batch_emitting = 0; \ } while (0) +void intel_next_vertex(intel_screen_private *intel); +static inline void intel_vertex_emit(intel_screen_private *intel, float v) +{ + intel->vertex_ptr[intel->vertex_used++] = v; +} +#define OUT_VERTEX(v) intel_vertex_emit(intel, v) + #endif /* _INTEL_BATCHBUFFER_H */ diff --git a/src/i830_bios.c b/src/i830_bios.c index b7262c79..da77cd43 100644 --- a/src/i830_bios.c +++ b/src/i830_bios.c @@ -352,7 +352,7 @@ int i830_bios_init(ScrnInfoPtr scrn) "libpciaccess reported 0 rom size, guessing %dkB\n", size / 1024); } - bios = xalloc(size); + bios = malloc(size); if (bios == NULL) return -1; @@ -361,7 +361,7 @@ int i830_bios_init(ScrnInfoPtr scrn) xf86DrvMsg(scrn->scrnIndex, X_WARNING, "libpciaccess failed to read %dkB video BIOS: %s\n", size / 1024, strerror(-ret)); - xfree(bios); + free(bios); return -1; } @@ -369,7 +369,7 @@ int i830_bios_init(ScrnInfoPtr scrn) if (vbt_off >= size) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Bad VBT offset: 0x%x\n", vbt_off); - xfree(bios); + free(bios); return -1; } @@ -377,7 +377,7 @@ int i830_bios_init(ScrnInfoPtr scrn) if (memcmp(vbt->signature, "$VBT", 4) != 0) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Bad VBT signature\n"); - xfree(bios); + free(bios); return -1; } @@ -390,7 +390,7 @@ int i830_bios_init(ScrnInfoPtr scrn) parse_driver_feature(intel, bdb); parse_sdvo_mapping(scrn, bdb); - xfree(bios); + free(bios); return 0; } diff --git a/src/i830_crt.c b/src/i830_crt.c index 26c9d412..b18834e7 100644 --- a/src/i830_crt.c +++ b/src/i830_crt.c @@ -500,7 +500,7 @@ static void i830_crt_destroy (xf86OutputPtr output) { if (output->driver_private) - xfree (output->driver_private); + free (output->driver_private); } #ifdef RANDR_GET_CRTC_INTERFACE @@ -537,7 +537,7 @@ i830_get_edid(xf86OutputPtr output, int gpio_reg, char *gpio_str) xf86DestroyI2CBusRec(intel_output->pDDCBus, TRUE, TRUE); intel_output->pDDCBus = NULL; if (edid_mon) { - xfree(edid_mon); + free(edid_mon); edid_mon = NULL; } } diff --git a/src/i830_display.c b/src/i830_display.c index 2358f193..c96b9d6e 100644 --- a/src/i830_display.c +++ b/src/i830_display.c @@ -701,7 +701,8 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, xf86CrtcPtr crtc, intel_clock_t clock; int max_n; Bool found = FALSE; - int err_most = (target >> 8) + (target >> 10); + /* Approximately equals target * 0.00585 */ + int err_most = (target >> 8) + (target >> 9); if (i830PipeHasType(crtc, I830_OUTPUT_LVDS)) { if ((INREG(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) @@ -2764,6 +2765,8 @@ static void i830_crtc_gamma_set(xf86CrtcPtr crtc, CARD16 *red, CARD16 *green, CARD16 *blue, int size) { + ScrnInfoPtr scrn = crtc->scrn; + intel_screen_private *intel = intel_get_screen_private(scrn); I830CrtcPrivatePtr intel_crtc = crtc->driver_private; int i; @@ -2775,7 +2778,13 @@ i830_crtc_gamma_set(xf86CrtcPtr crtc, CARD16 *red, CARD16 *green, CARD16 *blue, intel_crtc->lut_b[i] = blue[i] >> 8; } - i830_crtc_load_lut(crtc); + /* + * 855 at least really doesn't seem like like you poking its + * pallette registers other than at mode set time. so just disable + * this for now on 8xx. Stops hard machine lockups for me. + */ + if (IS_I9XX(intel)) + i830_crtc_load_lut(crtc); } /** @@ -3186,7 +3195,7 @@ i830_crtc_mode_get(ScrnInfoPtr scrn, xf86CrtcPtr crtc) int vtot = INREG((pipe == 0) ? VTOTAL_A : VTOTAL_B); int vsync = INREG((pipe == 0) ? VSYNC_A : VSYNC_B); - mode = xcalloc(1, sizeof(DisplayModeRec)); + mode = calloc(1, sizeof(DisplayModeRec)); if (mode == NULL) return NULL; diff --git a/src/i830_dri.c b/src/i830_dri.c index d6522649..ab895df7 100644 --- a/src/i830_dri.c +++ b/src/i830_dri.c @@ -44,6 +44,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include <sys/ioctl.h> #include <unistd.h> #include <fcntl.h> +#include <sys/time.h> +#include <time.h> +#include <errno.h> #include "xf86.h" #include "xf86_OSproc.h" @@ -72,6 +75,7 @@ extern XF86ModuleData dri2ModuleData; #endif typedef struct { + int refcnt; PixmapPtr pixmap; unsigned int attachment; } I830DRI2BufferPrivateRec, *I830DRI2BufferPrivatePtr; @@ -90,12 +94,12 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, I830DRI2BufferPrivatePtr privates; PixmapPtr pixmap, pDepthPixmap; - buffers = xcalloc(count, sizeof *buffers); + buffers = calloc(count, sizeof *buffers); if (buffers == NULL) return NULL; - privates = xcalloc(count, sizeof *privates); + privates = calloc(count, sizeof *privates); if (privates == NULL) { - xfree(buffers); + free(buffers); return NULL; } @@ -125,8 +129,7 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, break; } - if (!intel->tiling || - (!IS_I965G(intel) && !intel->kernel_exec_fencing)) + if (!intel->tiling) hint = 0; pixmap = screen->CreatePixmap(screen, @@ -145,14 +148,14 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, buffers[i].cpp = pixmap->drawable.bitsPerPixel / 8; buffers[i].driverPrivate = &privates[i]; buffers[i].flags = 0; /* not tiled */ + privates[i].refcnt = 1; privates[i].pixmap = pixmap; privates[i].attachment = attachments[i]; bo = i830_get_pixmap_bo(pixmap); - if (dri_bo_flink(bo, &buffers[i].name) != 0) { + if (bo != NULL && dri_bo_flink(bo, &buffers[i].name) != 0) { /* failed to name buffer */ } - } return buffers; @@ -172,12 +175,12 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, I830DRI2BufferPrivatePtr privates; PixmapPtr pixmap; - buffer = xcalloc(1, sizeof *buffer); + buffer = calloc(1, sizeof *buffer); if (buffer == NULL) return NULL; - privates = xcalloc(1, sizeof *privates); + privates = calloc(1, sizeof *privates); if (privates == NULL) { - xfree(buffer); + free(buffer); return NULL; } @@ -203,8 +206,7 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, break; } - if (!intel->tiling || - (!IS_I965G(intel) && !intel->kernel_exec_fencing)) + if (!intel->tiling) hint = 0; pixmap = screen->CreatePixmap(screen, @@ -213,6 +215,11 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, (format != 0) ? format : drawable->depth, hint); + if (pixmap == NULL) { + free(privates); + free(buffer); + return NULL; + } } @@ -222,12 +229,17 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, buffer->driverPrivate = privates; buffer->format = format; buffer->flags = 0; /* not tiled */ + privates->refcnt = 1; privates->pixmap = pixmap; privates->attachment = attachment; bo = i830_get_pixmap_bo(pixmap); - if (dri_bo_flink(bo, &buffer->name) != 0) { + if (bo == NULL || dri_bo_flink(bo, &buffer->name) != 0) { /* failed to name buffer */ + screen->DestroyPixmap(pixmap); + free(privates); + free(buffer); + return NULL; } return buffer; @@ -250,8 +262,8 @@ I830DRI2DestroyBuffers(DrawablePtr drawable, DRI2BufferPtr buffers, int count) } if (buffers) { - xfree(buffers[0].driverPrivate); - xfree(buffers); + free(buffers[0].driverPrivate); + free(buffers); } } @@ -261,17 +273,27 @@ static void I830DRI2DestroyBuffer(DrawablePtr drawable, DRI2Buffer2Ptr buffer) { if (buffer) { I830DRI2BufferPrivatePtr private = buffer->driverPrivate; - ScreenPtr screen = drawable->pScreen; + if (--private->refcnt == 0) { + ScreenPtr screen = private->pixmap->drawable.pScreen; - screen->DestroyPixmap(private->pixmap); + screen->DestroyPixmap(private->pixmap); - xfree(private); - xfree(buffer); + free(private); + free(buffer); + } } } #endif +static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer) +{ + if (buffer) { + I830DRI2BufferPrivatePtr private = buffer->driverPrivate; + private->refcnt++; + } +} + static void I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, DRI2BufferPtr destBuffer, DRI2BufferPtr sourceBuffer) @@ -288,7 +310,10 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, RegionPtr pCopyClip; GCPtr gc; - gc = GetScratchGC(drawable->depth, screen); + gc = GetScratchGC(dst->depth, screen); + if (!gc) + return; + pCopyClip = REGION_CREATE(screen, NULL, 0); REGION_COPY(screen, pCopyClip, pRegion); (*gc->funcs->ChangeClip) (gc, CT_REGION, pCopyClip, 0); @@ -375,15 +400,542 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, * later. * * We can't rely on getting into the block handler before the DRI - * client gets to run again so flush now. */ - intel_batch_submit(scrn); -#if ALWAYS_SYNC - intel_sync(scrn); -#endif + * client gets to run again so flush now. + */ + intel_batch_submit(scrn, TRUE); drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE); +} + +#if DRI2INFOREC_VERSION >= 4 + +enum DRI2FrameEventType { + DRI2_SWAP, + DRI2_FLIP, + DRI2_WAITMSC, +}; + +typedef struct _DRI2FrameEvent { + XID drawable_id; + ClientPtr client; + enum DRI2FrameEventType type; + int frame; + + /* for swaps & flips only */ + DRI2SwapEventPtr event_complete; + void *event_data; + DRI2BufferPtr front; + DRI2BufferPtr back; +} DRI2FrameEventRec, *DRI2FrameEventPtr; + +static int +I830DRI2DrawablePipe(DrawablePtr pDraw) +{ + ScreenPtr pScreen = pDraw->pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + BoxRec box, crtcbox; + xf86CrtcPtr crtc; + int pipe = -1; + + box.x1 = pDraw->x; + box.y1 = pDraw->y; + box.x2 = box.x1 + pDraw->width; + box.y2 = box.y1 + pDraw->height; + + crtc = i830_covering_crtc(pScrn, &box, NULL, &crtcbox); + + /* Make sure the CRTC is valid and this is the real front buffer */ + if (crtc != NULL && !crtc->rotatedData) + pipe = i830_crtc_to_pipe(crtc); + + return pipe; +} + +static void +I830DRI2ExchangeBuffers(DrawablePtr draw, DRI2BufferPtr front, + DRI2BufferPtr back) +{ + I830DRI2BufferPrivatePtr front_priv, back_priv; + struct intel_pixmap *front_intel, *back_intel; + ScreenPtr screen; + intel_screen_private *intel; + int tmp; + + front_priv = front->driverPrivate; + back_priv = back->driverPrivate; + + /* Swap BO names so DRI works */ + tmp = front->name; + front->name = back->name; + back->name = tmp; + + /* Swap pixmap bos */ + front_intel = i830_get_pixmap_intel(front_priv->pixmap); + back_intel = i830_get_pixmap_intel(back_priv->pixmap); + i830_set_pixmap_intel(front_priv->pixmap, back_intel); + i830_set_pixmap_intel(back_priv->pixmap, front_intel); /* should be screen */ + + /* Do we need to update the Screen? */ + screen = draw->pScreen; + intel = intel_get_screen_private(xf86Screens[screen->myNum]); + if (front_intel->bo == intel->front_buffer->bo) { + dri_bo_unreference (intel->front_buffer->bo); + intel->front_buffer->bo = back_intel->bo; + dri_bo_reference (intel->front_buffer->bo); + i830_set_pixmap_intel(screen->GetScreenPixmap(screen), + back_intel); + } +} + +#ifdef notyet +void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + DRI2FrameEventPtr event = event_data; + DrawablePtr drawable; + ScreenPtr screen; + ScrnInfoPtr scrn; + intel_screen_private *intel; + int status; + + status = dixLookupDrawable(&drawable, event->drawable_id, serverClient, + M_ANY, DixWriteAccess); + if (status != Success) { + I830DRI2DestroyBuffer(NULL, event->front); + I830DRI2DestroyBuffer(NULL, event->back); + free(event); + return; + } + + screen = drawable->pScreen; + scrn = xf86Screens[screen->myNum]; + intel = intel_get_screen_private(scrn); + + switch (event->type) { + case DRI2_SWAP: { + int swap_type; + + if (DRI2CanExchange(drawable)) { + I830DRI2ExchangeBuffers(drawable, + event->front, event->back); + swap_type = DRI2_EXCHANGE_COMPLETE; + } else { + BoxRec box; + RegionRec region; + + box.x1 = 0; + box.y1 = 0; + box.x2 = drawable->width; + box.y2 = drawable->height; + REGION_INIT(pScreen, ®ion, &box, 0); + + I830DRI2CopyRegion(drawable, + ®ion, event->front, event->back); + swap_type = DRI2_BLIT_COMPLETE; + } + DRI2SwapComplete(event->client, drawable, frame, tv_sec, tv_usec, + swap_type, + event->event_complete, event->event_data); + break; + } + case DRI2_WAITMSC: + DRI2WaitMSCComplete(event->client, drawable, + frame, tv_sec, tv_usec); + break; + default: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + + I830DRI2DestroyBuffer(drawable, event->front); + I830DRI2DestroyBuffer(drawable, event->back); + free(event); +} + +void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + DRI2FrameEventPtr flip = event_data; + DrawablePtr drawable; + ScreenPtr screen; + ScrnInfoPtr scrn; + int status; + + status = dixLookupDrawable(&drawable, flip->drawable_id, serverClient, + M_ANY, DixWriteAccess); + if (status != Success) { + free(flip); + return; + } + + screen = drawable->pScreen; + scrn = xf86Screens[screen->myNum]; + + /* We assume our flips arrive in order, so we don't check the frame */ + switch (flip->type) { + case DRI2_SWAP: + DRI2SwapComplete(flip->client, drawable, frame, tv_sec, tv_usec, + DRI2_FLIP_COMPLETE, flip->event_complete, + flip->event_data); + break; + default: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + + free(flip); +} + +/* + * ScheduleSwap is responsible for requesting a DRM vblank event for the + * appropriate frame. + * + * In the case of a blit (e.g. for a windowed swap) or buffer exchange, + * the vblank requested can simply be the last queued swap frame + the swap + * interval for the drawable. + * + * In the case of a page flip, we request an event for the last queued swap + * frame + swap interval - 1, since we'll need to queue the flip for the frame + * immediately following the received event. + * + * The client will be blocked if it tries to perform further GL commands + * after queueing a swap, though in the Intel case after queueing a flip, the + * client is free to queue more commands; they'll block in the kernel if + * they access buffers busy with the flip. + * + * When the swap is complete, the driver should call into the server so it + * can send any swap complete events that have been requested. + */ +static int +I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor, + CARD64 remainder, DRI2SwapEventPtr func, void *data) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw), flip = 0; + DRI2FrameEventPtr swap_info; + enum DRI2FrameEventType swap_type = DRI2_SWAP; + CARD64 current_msc; + BoxRec box; + RegionRec region; + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + *target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + swap_info = calloc(1, sizeof(DRI2FrameEventRec)); + + /* Drawable not displayed... just complete the swap */ + if (pipe == -1 || !swap_info) + goto blit_fallback; + + swap_info->drawable_id = draw->id; + swap_info->client = client; + swap_info->event_complete = func; + swap_info->event_data = data; + swap_info->front = front; + swap_info->back = back; + I830DRI2ReferenceBuffer(front); + I830DRI2ReferenceBuffer(back); + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "first get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + current_msc = vbl.reply.sequence; + + swap_info->type = swap_type; + + /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP. + * Do it early, so handling of different timing constraints + * for divisor, remainder and msc vs. target_msc works. + */ + if (*target_msc > 0) + *target_msc -= flip; + + /* + * If divisor is zero, or current_msc is smaller than target_msc + * we just need to make sure target_msc passes before initiating + * the swap. + */ + if (divisor == 0 || current_msc < *target_msc) { + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + /* If non-pageflipping, but blitting/exchanging, we need to use + * DRM_VBLANK_NEXTONMISS to avoid unreliable timestamping later + * on. + */ + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This makes swap_interval logic more robust. + */ + if (current_msc >= *target_msc) + *target_msc = current_msc; + + vbl.request.sequence = *target_msc; + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "divisor 0 get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * and we need to queue an event that will satisfy the divisor/remainder + * equation. + */ + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If the calculated deadline vbl.request.sequence is smaller than + * or equal to current_msc, it means we've passed the last point + * when effective onset frame seq could satisfy + * seq % divisor == remainder, so we need to wait for the next time + * this will happen. + + * This comparison takes the 1 frame swap delay in pageflipping mode + * into account, as well as a potential DRM_VBLANK_NEXTONMISS delay + * if we are blitting/exchanging instead of flipping. + */ + if (vbl.request.sequence <= current_msc) + vbl.request.sequence += divisor; + + /* Account for 1 frame extra pageflip delay if flip > 0 */ + vbl.request.sequence -= flip; + + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "final get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + /* Adjust returned value for 1 fame pageflip offset of flip > 0 */ + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + + return TRUE; + +blit_fallback: + box.x1 = 0; + box.y1 = 0; + box.x2 = draw->width; + box.y2 = draw->height; + REGION_INIT(pScreen, ®ion, &box, 0); + + I830DRI2CopyRegion(draw, ®ion, front, back); + + DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data); + if (swap_info) { + I830DRI2DestroyBuffer(draw, swap_info->front); + I830DRI2DestroyBuffer(draw, swap_info->back); + free(swap_info); + } + *target_msc = 0; /* offscreen, so zero out target vblank count */ + return TRUE; } +/* + * Get current frame count and frame count timestamp, based on drawable's + * crtc. + */ +static int +I830DRI2GetMSC(DrawablePtr draw, CARD64 *ust, CARD64 *msc) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw); + + /* Drawable not displayed, make up a value */ + if (pipe == -1) { + *ust = 0; + *msc = 0; + return TRUE; + } + + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "get vblank counter failed: %s\n", strerror(errno)); + return FALSE; + } + + *ust = ((CARD64)vbl.reply.tval_sec * 1000000) + vbl.reply.tval_usec; + *msc = vbl.reply.sequence; + + return TRUE; +} + +/* + * Request a DRM event when the requested conditions will be satisfied. + * + * We need to handle the event and ask the server to wake up the client when + * we receive it. + */ +static int +I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + CARD64 divisor, CARD64 remainder) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + DRI2FrameEventPtr wait_info; + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw); + CARD64 current_msc; + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + /* Drawable not visible, return immediately */ + if (pipe == -1) + goto out_complete; + + wait_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!wait_info) + goto out_complete; + + wait_info->drawable_id = draw->id; + wait_info->client = client; + wait_info->type = DRI2_WAITMSC; + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "get vblank counter failed: %s\n", strerror(errno)); + goto out_complete; + } + + current_msc = vbl.reply.sequence; + + /* + * If divisor is zero, or current_msc is smaller than target_msc, + * we just need to make sure target_msc passes before waking up the + * client. + */ + if (divisor == 0 || current_msc < target_msc) { + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This keeps the client from continually + * sending us MSC targets from the past by forcibly updating + * their count on this call. + */ + if (current_msc >= target_msc) + target_msc = current_msc; + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = target_msc; + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "get vblank counter failed: %s\n", strerror(errno)); + goto out_complete; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * so we queue an event that will satisfy the divisor/remainder equation. + */ + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If calculated remainder is larger than requested remainder, + * it means we've passed the last point where + * seq % divisor == remainder, so we need to wait for the next time + * that will happen. + */ + if ((current_msc % divisor) >= remainder) + vbl.request.sequence += divisor; + + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "get vblank counter failed: %s\n", strerror(errno)); + goto out_complete; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + + return TRUE; + +out_complete: + DRI2WaitMSCComplete(client, draw, target_msc, 0, 0); + return TRUE; +} +#endif +#endif + Bool I830DRI2ScreenInit(ScreenPtr screen) { ScrnInfoPtr scrn = xf86Screens[screen->myNum]; @@ -393,6 +945,9 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) int dri2_major = 1; int dri2_minor = 0; #endif +#if DRI2INFOREC_VERSION >= 4 + const char *driverNames[1]; +#endif #ifdef USE_DRI2_1_1_0 if (xf86LoaderCheckSymbol("DRI2Version")) { @@ -407,6 +962,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) #endif intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD); + memset(&info, '\0', sizeof(info)); info.fd = intel->drmSubFD; info.driverName = IS_I965G(intel) ? "i965" : "i915"; info.deviceName = intel->deviceName; @@ -430,6 +986,17 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) #endif info.CopyRegion = I830DRI2CopyRegion; +#if DRI2INFOREC_VERSION >= 4 +#ifdef notyet + info.version = 4; + info.ScheduleSwap = I830DRI2ScheduleSwap; + info.GetMSC = I830DRI2GetMSC; + info.ScheduleWaitMSC = I830DRI2ScheduleWaitMSC; + info.numDrivers = 1; + info.driverNames = driverNames; + driverNames[0] = info.driverName; +#endif +#endif return DRI2ScreenInit(screen, &info); } diff --git a/src/i830_driver.c b/src/i830_driver.c index 1aa7f19e..10c3f38e 100644 --- a/src/i830_driver.c +++ b/src/i830_driver.c @@ -48,6 +48,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include <stdlib.h> #include <stdio.h> #include <errno.h> +#include <poll.h> #include "xf86.h" #include "xf86_OSproc.h" @@ -254,16 +255,12 @@ static Bool I830GetRec(ScrnInfoPtr scrn) static void I830FreeRec(ScrnInfoPtr scrn) { - intel_screen_private *intel; - if (!scrn) return; if (!scrn->driverPrivate) return; - intel = intel_get_screen_private(scrn); - - xfree(scrn->driverPrivate); + free(scrn->driverPrivate); scrn->driverPrivate = NULL; } @@ -498,10 +495,6 @@ I830MapMem(ScrnInfoPtr scrn) if (err) return FALSE; - if (intel->ring.mem != NULL) { - intel->ring.virtual_start = intel->FbBase + intel->ring.mem->offset; - } - return TRUE; } @@ -988,7 +981,7 @@ static Bool i830_kernel_mode_enabled(ScrnInfoPtr scrn) /* Be nice to the user and load fbcon too */ if (!ret) (void)xf86LoadKernelModule("fbcon"); - xfree(busIdString); + free(busIdString); if (ret) return FALSE; @@ -1258,7 +1251,7 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) /* Process the options */ xf86CollectOptions(scrn, NULL); - if (!(intel->Options = xalloc(sizeof(I830Options)))) + if (!(intel->Options = malloc(sizeof(I830Options)))) return FALSE; memcpy(intel->Options, I830Options, sizeof(I830Options)); xf86ProcessOptions(scrn->scrnIndex, scrn->options, intel->Options); @@ -1349,6 +1342,25 @@ static void i830_check_dri_option(ScrnInfoPtr scrn) } } +#ifdef notyet +static void +drm_vblank_handler(int fd, unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + I830DRI2FrameEventHandler(frame, tv_sec, tv_usec, event_data); +} + +static void +drm_wakeup_handler(pointer data, int err, pointer p) +{ + intel_screen_private *intel = data; + fd_set *read_mask = p; + + if (err >= 0 && FD_ISSET(intel->drmSubFD, read_mask)) + drmHandleEvent(intel->drmSubFD, &intel->event_context); +} +#endif + static Bool i830_user_modesetting_init(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); @@ -1383,6 +1395,14 @@ static Bool i830_user_modesetting_init(ScrnInfoPtr scrn) RestoreHWState(scrn); intel->stolen_size = I830DetectMemory(scrn); +#ifdef notyet + intel->event_context.version = DRM_EVENT_CONTEXT_VERSION; + intel->event_context.vblank_handler = drm_vblank_handler; + AddGeneralSocket(intel->drmSubFD); + RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, intel); +#endif + return TRUE; } @@ -1405,11 +1425,11 @@ static Bool i830_open_drm_master(ScrnInfoPtr scrn) xf86DrvMsg(scrn->scrnIndex, X_ERROR, "[drm] Failed to open DRM device for %s: %s\n", busid, strerror(errno)); - xfree(busid); + free(busid); return FALSE; } - xfree(busid); + free(busid); /* Check that what we opened was a master or a master-capable FD, * by setting the version of the interface we'll use to talk to it. @@ -1464,8 +1484,6 @@ static Bool I830DrmModeInit(ScrnInfoPtr scrn) return FALSE; } - intel->have_gem = TRUE; - i830_init_bufmgr(scrn); return TRUE; @@ -1537,7 +1555,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags) intel->SaveGeneration = -1; intel->pEnt = pEnt; intel->use_drm_mode = drm_mode_setting; - intel->kernel_exec_fencing = intel->use_drm_mode; if (!I830LoadSyms(scrn)) return FALSE; @@ -1680,81 +1697,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags) return TRUE; } -/* - * Reset registers that it doesn't make sense to save/restore to a sane state. - * This is basically the ring buffer and fence registers. Restoring these - * doesn't make sense without restoring GTT mappings. This is something that - * whoever gets control next should do. - */ -static void i830_stop_ring(ScrnInfoPtr scrn, Bool flush) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - unsigned long temp; - - DPRINTF(PFX, "ResetState: flush is %s\n", BOOLTOSTRING(flush)); - - /* Flush the ring buffer, then disable it. */ - temp = INREG(LP_RING + RING_LEN); - if (temp & RING_VALID) { - i830_refresh_ring(scrn); - i830_wait_ring_idle(scrn); - } - - OUTREG(LP_RING + RING_LEN, 0); - OUTREG(LP_RING + RING_HEAD, 0); - OUTREG(LP_RING + RING_TAIL, 0); - OUTREG(LP_RING + RING_START, 0); -} - -static void i830_start_ring(ScrnInfoPtr scrn) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - unsigned int itemp; - - DPRINTF(PFX, "SetRingRegs\n"); - - OUTREG(LP_RING + RING_LEN, 0); - OUTREG(LP_RING + RING_TAIL, 0); - OUTREG(LP_RING + RING_HEAD, 0); - - assert((intel->ring.mem->offset & I830_RING_START_MASK) == - intel->ring.mem->offset); - - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = intel->ring.mem->offset; - OUTREG(LP_RING + RING_START, itemp); - - if (((intel->ring.mem->size - 4096) & I830_RING_NR_PAGES) != - intel->ring.mem->size - 4096) { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates " - "its mask (%x)\n", intel->ring.mem->size - 4096, - I830_RING_NR_PAGES); - } - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = (intel->ring.mem->size - 4096) & I830_RING_NR_PAGES; - itemp |= (RING_NO_REPORT | RING_VALID); - OUTREG(LP_RING + RING_LEN, itemp); - i830_refresh_ring(scrn); -} - -void i830_refresh_ring(ScrnInfoPtr scrn) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - - /* If we're reaching RefreshRing as a result of grabbing the DRI lock - * before we've set up the ringbuffer, don't bother. - */ - if (intel->ring.mem == NULL) - return; - - intel->ring.head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK; - intel->ring.tail = INREG(LP_RING + RING_TAIL); - intel->ring.space = intel->ring.head - (intel->ring.tail + 8); - if (intel->ring.space < 0) - intel->ring.space += intel->ring.mem->size; -} - enum pipe { PIPE_A = 0, PIPE_B, @@ -2226,16 +2168,10 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask) /* Emit a flush of the rendering cache, or on the 965 and beyond * rendering results may not hit the framebuffer until significantly * later. - * - * XXX Under KMS this is only required because tfp does not have - * the appropriate synchronisation points, so that outstanding updates - * to the pixmap are flushed prior to use as a texture. The framebuffer - * should be handled by the kernel domain management... */ - if (intel->need_mi_flush || !list_is_empty(&intel->flush_pixmaps)) - intel_batch_emit_flush(scrn); - - intel_batch_submit(scrn); + intel_batch_submit(scrn, + intel->need_mi_flush || + !list_is_empty(&intel->flush_pixmaps)); drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE); } @@ -2373,24 +2309,16 @@ void i830_init_bufmgr(ScrnInfoPtr scrn) if (intel->bufmgr) return; - if (intel->have_gem) { + batch_size = 4096 * 4; - batch_size = 4096 * 4; + /* The 865 has issues with larger-than-page-sized batch buffers. */ + if (IS_I865G(intel)) + batch_size = 4096; - /* The 865 has issues with larger-than-page-sized batch buffers. */ - if (IS_I865G(intel)) - batch_size = 4096; + intel->bufmgr = intel_bufmgr_gem_init(intel->drmSubFD, batch_size); + intel_bufmgr_gem_enable_reuse(intel->bufmgr); + drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr); - intel->bufmgr = intel_bufmgr_gem_init(intel->drmSubFD, batch_size); - intel_bufmgr_gem_enable_reuse(intel->bufmgr); - drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr); - } else { - assert(intel->FbBase != NULL); - intel->bufmgr = intel_bufmgr_fake_init(intel->drmSubFD, - intel->fake_bufmgr_mem->offset, intel->FbBase + - intel->fake_bufmgr_mem->offset, - intel->fake_bufmgr_mem->size, NULL); - } list_init(&intel->batch_pixmaps); list_init(&intel->flush_pixmaps); list_init(&intel->in_flight); @@ -2758,11 +2686,12 @@ I830ScreenInit(int scrnIndex, ScreenPtr screen, int argc, char **argv) return FALSE; } - if (IS_I965G(intel)) + if (IS_I965G(intel)) { intel->batch_flush_notify = i965_batch_flush_notify; - else if (IS_I9XX(intel)) + } else if (IS_I9XX(intel)) { + intel->vertex_flush = i915_vertex_flush; intel->batch_flush_notify = i915_batch_flush_notify; - else + } else intel->batch_flush_notify = i830_batch_flush_notify; miInitializeBackingStore(screen); @@ -2848,7 +2777,7 @@ I830ScreenInit(int scrnIndex, ScreenPtr screen, int argc, char **argv) intel->closing = FALSE; intel->suspended = FALSE; - return TRUE; + return uxa_resources_init(screen); } static void i830AdjustFrame(int scrnIndex, int x, int y, int flags) @@ -2896,26 +2825,25 @@ static void I830LeaveVT(int scrnIndex, int flags) intel->leaving = TRUE; - if (intel->devicesTimer) - TimerFree(intel->devicesTimer); - intel->devicesTimer = NULL; - i830SetHotkeyControl(scrn, HOTKEY_BIOS_SWITCH); xf86RotateFreeShadow(scrn); xf86_hide_cursors(scrn); - intel_sync(scrn); - if (!intel->use_drm_mode) { RestoreHWState(scrn); - /* Evict everything from the bufmgr, as we're about to lose - * ownership of the graphics memory. - */ - if (!intel->have_gem) { - intel_bufmgr_fake_evict_all(intel->bufmgr); - i830_stop_ring(scrn, TRUE); + + /* console restore hack */ + if (IS_IGDNG(intel) && intel->int10 && intel->int10Mode) { + xf86Int10InfoPtr int10 = intel->int10; + + /* Use int10 to restore the console mode */ + int10->num = 0x10; + int10->ax = 0x4f02; + int10->bx = intel->int10Mode | 0x8000; + int10->cx = int10->dx = 0; + xf86ExecX86int10(int10); } /* console restore hack */ @@ -2936,7 +2864,7 @@ static void I830LeaveVT(int scrnIndex, int flags) i830_unbind_all_memory(scrn); - if (intel->have_gem && !intel->use_drm_mode) { + if (!intel->use_drm_mode) { int ret; /* Tell the kernel to evict all buffer objects and block GTT @@ -3017,12 +2945,11 @@ static Bool I830EnterVT(int scrnIndex, int flags) intel->leaving = FALSE; - if (!intel->use_drm_mode) - i830_disable_render_standby(scrn); - - if (intel->have_gem && !intel->use_drm_mode) { + if (!intel->use_drm_mode) { int ret; + i830_disable_render_standby(scrn); + /* Tell the kernel that we're back in control and ready for GTT * usage. */ @@ -3043,11 +2970,6 @@ static Bool I830EnterVT(int scrnIndex, int flags) gen4_render_state_init(scrn); if (!intel->use_drm_mode) { - /* Re-set up the ring. */ - if (!intel->have_gem) { - i830_stop_ring(scrn, FALSE); - i830_start_ring(scrn); - } I830InitHWCursor(scrn); /* Tell the BIOS that we're in control of mode setting now. */ @@ -3099,10 +3021,6 @@ static Bool I830CloseScreen(int scrnIndex, ScreenPtr screen) I830LeaveVT(scrnIndex, 0); } - if (intel->devicesTimer) - TimerFree(intel->devicesTimer); - intel->devicesTimer = NULL; - if (!intel->use_drm_mode) { DPRINTF(PFX, "\nUnmapping memory\n"); I830UnmapMem(scrn); @@ -3111,7 +3029,7 @@ static Bool I830CloseScreen(int scrnIndex, ScreenPtr screen) if (intel->uxa_driver) { uxa_driver_fini(screen); - xfree(intel->uxa_driver); + free(intel->uxa_driver); intel->uxa_driver = NULL; } if (intel->front_buffer) { diff --git a/src/i830_dvo.c b/src/i830_dvo.c index 3deb254b..bccbe7b0 100644 --- a/src/i830_dvo.c +++ b/src/i830_dvo.c @@ -302,7 +302,7 @@ i830_dvo_destroy (xf86OutputPtr output) xf86DestroyI2CBusRec (intel_output->pI2CBus, TRUE, TRUE); if (intel_output->pDDCBus) xf86DestroyI2CBusRec (intel_output->pDDCBus, TRUE, TRUE); - xfree (intel_output); + free (intel_output); } } @@ -407,7 +407,7 @@ i830_dvo_init(ScrnInfoPtr scrn) /* Set up the DDC bus */ ret = I830I2CInit(scrn, &intel_output->pDDCBus, GPIOD, "DVODDC_D"); if (!ret) { - xfree(intel_output); + free(intel_output); return; } @@ -482,7 +482,7 @@ i830_dvo_init(ScrnInfoPtr scrn) if (output == NULL) { xf86DestroyI2CBusRec(pI2CBus, TRUE, TRUE); xf86DestroyI2CBusRec(intel_output->pDDCBus, TRUE, TRUE); - xfree(intel_output); + free(intel_output); xf86UnloadSubModule(drv->modhandle); return; } @@ -517,5 +517,5 @@ i830_dvo_init(ScrnInfoPtr scrn) if (pI2CBus != NULL) xf86DestroyI2CBusRec(pI2CBus, TRUE, TRUE); xf86DestroyI2CBusRec(intel_output->pDDCBus, TRUE, TRUE); - xfree(intel_output); + free(intel_output); } diff --git a/src/i830_hdmi.c b/src/i830_hdmi.c index 3b04b0f5..41b264ec 100644 --- a/src/i830_hdmi.c +++ b/src/i830_hdmi.c @@ -231,7 +231,7 @@ i830_hdmi_detect(xf86OutputPtr output) dev_priv->has_hdmi_sink ? "HDMI" : "DVI", (dev_priv->output_reg == SDVOB) ? 1 : 2); - xfree(edid_mon); + free(edid_mon); return status; } @@ -242,7 +242,7 @@ i830_hdmi_destroy (xf86OutputPtr output) if (intel_output != NULL) { xf86DestroyI2CBusRec(intel_output->pDDCBus, FALSE, FALSE); - xfree(intel_output); + free(intel_output); } } diff --git a/src/i830_hwmc.c b/src/i830_hwmc.c index 850bf876..9aa0af33 100644 --- a/src/i830_hwmc.c +++ b/src/i830_hwmc.c @@ -62,7 +62,7 @@ static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext, intel_screen_private *intel = intel_get_screen_private(scrn); struct intel_xvmc_hw_context *contextRec; - *priv = xcalloc(1, sizeof(struct intel_xvmc_hw_context)); + *priv = calloc(1, sizeof(struct intel_xvmc_hw_context)); contextRec = (struct intel_xvmc_hw_context *) *priv; if (!contextRec) { *num_priv = 0; @@ -207,7 +207,7 @@ Bool intel_xvmc_adaptor_init(ScreenPtr pScreen) return FALSE; } - pAdapt = xcalloc(1, sizeof(XF86MCAdaptorRec)); + pAdapt = calloc(1, sizeof(XF86MCAdaptorRec)); if (!pAdapt) { ErrorF("Allocation error.\n"); return FALSE; diff --git a/src/i830_lvds.c b/src/i830_lvds.c index 7af63ffe..651916cf 100644 --- a/src/i830_lvds.c +++ b/src/i830_lvds.c @@ -1084,7 +1084,7 @@ i830_lvds_get_modes(xf86OutputPtr output) if (!output->MonInfo) { - edid_mon = xcalloc (1, sizeof (xf86Monitor)); + edid_mon = calloc (1, sizeof (xf86Monitor)); if (edid_mon) { struct detailed_monitor_section *det_mon = edid_mon->det_mon; @@ -1135,7 +1135,7 @@ i830_lvds_destroy (xf86OutputPtr output) xf86DeleteMode (&intel->lvds_fixed_mode, intel->lvds_fixed_mode); if (intel_output) - xfree (intel_output); + free (intel_output); } #ifdef RANDR_12_INTERFACE diff --git a/src/i830_memory.c b/src/i830_memory.c index 935575dd..0e523b7c 100644 --- a/src/i830_memory.c +++ b/src/i830_memory.c @@ -198,7 +198,7 @@ static Bool i830_bind_memory(ScrnInfoPtr scrn, i830_memory *mem) if (mem == NULL || mem->bound || intel->use_drm_mode) return TRUE; - if (intel->have_gem && mem->bo != NULL) { + if (mem->bo != NULL) { if (dri_bo_pin(mem->bo, mem->alignment) != 0) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, @@ -223,11 +223,6 @@ static Bool i830_bind_memory(ScrnInfoPtr scrn, i830_memory *mem) mem->bound = TRUE; } - if (mem->tiling != TILE_NONE && !intel->kernel_exec_fencing) { - mem->fence_nr = i830_set_tiling(scrn, mem->offset, mem->pitch, - mem->allocated_size, mem->tiling); - } - return TRUE; } @@ -238,10 +233,6 @@ static Bool i830_unbind_memory(ScrnInfoPtr scrn, i830_memory *mem) if (mem == NULL || !mem->bound) return TRUE; - if (mem->tiling != TILE_NONE && !intel->use_drm_mode && - !intel->kernel_exec_fencing) - i830_clear_tiling(scrn, mem->fence_nr); - if (mem->bo != NULL && !intel->use_drm_mode) { if (dri_bo_unpin(mem->bo) == 0) { mem->bound = FALSE; @@ -285,8 +276,8 @@ void i830_free_memory(ScrnInfoPtr scrn, i830_memory * mem) if (mem->next) mem->next->prev = mem->prev; } - xfree(mem->name); - xfree(mem); + free(mem->name); + free(mem); return; } /* Disconnect from the list of allocations */ @@ -300,8 +291,8 @@ void i830_free_memory(ScrnInfoPtr scrn, i830_memory * mem) mem->key = -1; } - xfree(mem->name); - xfree(mem); + free(mem->name); + free(mem); } /* Resets the state of the aperture allocator, freeing all memory that had @@ -341,8 +332,6 @@ void i830_reset_allocations(ScrnInfoPtr scrn) intel->front_buffer = NULL; intel->overlay_regs = NULL; intel->power_context = NULL; - intel->ring.mem = NULL; - intel->fake_bufmgr_mem = NULL; } /** @@ -360,25 +349,25 @@ Bool i830_allocator_init(ScrnInfoPtr scrn, unsigned long size) i830_memory *start, *end; struct drm_i915_setparam sp; - start = xcalloc(1, sizeof(*start)); + start = calloc(1, sizeof(*start)); if (start == NULL) return FALSE; start->name = xstrdup("start marker"); if (start->name == NULL) { - xfree(start); + free(start); return FALSE; } - end = xcalloc(1, sizeof(*end)); + end = calloc(1, sizeof(*end)); if (end == NULL) { - xfree(start->name); - xfree(start); + free(start->name); + free(start); return FALSE; } end->name = xstrdup("end marker"); if (end->name == NULL) { - xfree(start->name); - xfree(start); - xfree(end); + free(start->name); + free(start); + free(end); return FALSE; } @@ -442,8 +431,8 @@ Bool i830_allocator_init(ScrnInfoPtr scrn, unsigned long size) ret = drmCommandWrite(intel->drmSubFD, DRM_I915_SETPARAM, &sp, sizeof(sp)); - if (ret == 0) - intel->kernel_exec_fencing = TRUE; + if (ret != 0) + ErrorF("no kernel exec fencing, wtf?"); init.gtt_start = intel->memory_manager->offset; init.gtt_end = intel->memory_manager->offset + intel->memory_manager->size; @@ -451,15 +440,14 @@ Bool i830_allocator_init(ScrnInfoPtr scrn, unsigned long size) /* Tell the kernel to manage it */ ret = ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_INIT, &init); - if (ret == 0) { - intel->have_gem = TRUE; - i830_init_bufmgr(scrn); - } else { + if (ret != 0) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Failed to initialize kernel memory manager\n"); i830_free_memory(scrn, intel->memory_manager); intel->memory_manager = NULL; + return FALSE; } + i830_init_bufmgr(scrn); } else { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Failed to allocate space for kernel memory manager\n"); @@ -473,8 +461,8 @@ void i830_allocator_fini(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - /* The memory manager is more special */ if - (intel->memory_manager) { + /* The memory manager is more special */ + if (intel->memory_manager) { i830_free_memory(scrn, intel->memory_manager); intel->memory_manager = NULL; } @@ -576,7 +564,7 @@ i830_allocate_aperture(ScrnInfoPtr scrn, const char *name, unsigned long size, intel_screen_private *intel = intel_get_screen_private(scrn); i830_memory *mem, *scan; - mem = xcalloc(1, sizeof(*mem)); + mem = calloc(1, sizeof(*mem)); if (mem == NULL) return NULL; @@ -585,7 +573,7 @@ i830_allocate_aperture(ScrnInfoPtr scrn, const char *name, unsigned long size, mem->name = xstrdup(name); if (mem->name == NULL) { - xfree(mem); + free(mem); return NULL; } /* Only allocate page-sized increments. */ @@ -636,8 +624,8 @@ i830_allocate_aperture(ScrnInfoPtr scrn, const char *name, unsigned long size, } if (scan->next == NULL) { /* Reached the end of the list, and didn't find space */ - xfree(mem->name); - xfree(mem); + free(mem->name); + free(mem); return NULL; } /* Insert new allocation into the list */ @@ -709,21 +697,21 @@ static i830_memory *i830_allocate_memory_bo(ScrnInfoPtr scrn, const char *name, size = ALIGN(size, GTT_PAGE_SIZE); align = i830_get_fence_alignment(intel, size); - mem = xcalloc(1, sizeof(*mem)); + mem = calloc(1, sizeof(*mem)); if (mem == NULL) return NULL; mem->name = xstrdup(name); if (mem->name == NULL) { - xfree(mem); + free(mem); return NULL; } mem->bo = dri_bo_alloc(intel->bufmgr, name, size, align); if (!mem->bo) { - xfree(mem->name); - xfree(mem); + free(mem->name); + free(mem); return NULL; } @@ -767,8 +755,8 @@ static i830_memory *i830_allocate_memory_bo(ScrnInfoPtr scrn, const char *name, if (scrn->vtSema || intel->use_drm_mode) { if (!i830_bind_memory(scrn, mem)) { dri_bo_unreference (mem->bo); - xfree(mem->name); - xfree(mem); + free(mem->name); + free(mem); return NULL; } } @@ -838,7 +826,7 @@ i830_memory *i830_allocate_memory(ScrnInfoPtr scrn, const char *name, * kernel. Under UMS, we separately reserve space for * a few objects (overlays, power context, cursors, etc). */ - if (intel->have_gem && (intel->use_drm_mode || + if ((intel->use_drm_mode || !(flags & (NEED_PHYSICAL_ADDR|NEED_LIFETIME_FIXED)))) { return i830_allocate_memory_bo(scrn, name, size, pitch, alignment, flags, tile_format); @@ -943,31 +931,6 @@ i830_describe_allocations(ScrnInfoPtr scrn, int verbosity, const char *prefix) } } -static Bool i830_allocate_ringbuffer(ScrnInfoPtr scrn) -{ - intel_screen_private *intel = intel_get_screen_private(scrn); - - if (intel->have_gem || intel->ring.mem != NULL) - return TRUE; - - /* We don't have any mechanism in the DRM yet to alert it that we've - * moved the ringbuffer since init time, so allocate it fixed for its - * lifetime. - */ - intel->ring.mem = i830_allocate_memory(scrn, "ring buffer", - PRIMARY_RINGBUFFER_SIZE, PITCH_NONE, GTT_PAGE_SIZE, - NEED_LIFETIME_FIXED, TILE_NONE); - if (intel->ring.mem == NULL) { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "Failed to allocate Ring Buffer space\n"); - return FALSE; - } - - intel->ring.tail_mask = intel->ring.mem->size - 1; - intel->ring.virtual_start = intel->FbBase + intel->ring.mem->offset; - return TRUE; -} - /** * Allocate space for overlay registers. */ @@ -1247,10 +1210,6 @@ Bool i830_allocate_2d_memory(ScrnInfoPtr scrn) "\tthe agpgart module loaded.\n"); return FALSE; } - - /* Allocate the ring buffer first, so it ends up in stolen mem. */ - if (!i830_allocate_ringbuffer(scrn)) - return FALSE; } if (intel->fb_compression) @@ -1263,18 +1222,6 @@ Bool i830_allocate_2d_memory(ScrnInfoPtr scrn) return FALSE; } - if (!intel->have_gem) { - intel->fake_bufmgr_mem = i830_allocate_memory(scrn, - "fake bufmgr", MB(8), PITCH_NONE, GTT_PAGE_SIZE, 0, - TILE_NONE); - if (intel->fake_bufmgr_mem == NULL) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "Failed to allocate fake bufmgr space.\n"); - return FALSE; - } - i830_init_bufmgr(scrn); - } - if (!intel->use_drm_mode) i830_allocate_overlay(scrn); @@ -1680,53 +1627,55 @@ void i830_free_xvmc_buffer(ScrnInfoPtr scrn, i830_memory * buffer) #endif -static void i830_set_max_gtt_map_size(ScrnInfoPtr scrn) +static void i830_set_max_bo_size(intel_screen_private *intel, + const struct drm_i915_gem_get_aperture *aperture) { - intel_screen_private *intel = intel_get_screen_private(scrn); - struct drm_i915_gem_get_aperture aperture; - int ret; - - /* Default low value in case it gets used during server init. */ - intel->max_gtt_map_size = 16 * 1024 * 1024; - - if (!intel->have_gem) - return; + if (aperture->aper_available_size) + /* Large BOs will tend to hit SW fallbacks frequently, and also will + * tend to fail to successfully map when doing SW fallbacks because we + * overcommit address space for BO access, or worse cause aperture + * thrashing. + */ + intel->max_bo_size = aperture->aper_available_size / 2; + else + intel->max_bo_size = 64 * 1024 * 1024; +} - ret = - ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (ret == 0) { - /* Let objects up get bound up to the size where only 2 would - * fit in the aperture, but then leave slop to account for - * alignment like libdrm does. +static void i830_set_max_gtt_map_size(intel_screen_private *intel, + const struct drm_i915_gem_get_aperture *aperture) +{ + if (aperture->aper_available_size) + /* Let objects up get bound up to the size where only 2 would fit in + * the aperture, but then leave slop to account for alignment like + * libdrm does. */ intel->max_gtt_map_size = - aperture.aper_available_size * 3 / 4 / 2; - } + aperture->aper_available_size * 3 / 4 / 2; + else + intel->max_gtt_map_size = 16 * 1024 * 1024; } -static void i830_set_max_tiling_size(ScrnInfoPtr scrn) +static void i830_set_max_tiling_size(intel_screen_private *intel, + const struct drm_i915_gem_get_aperture *aperture) { - intel_screen_private *intel = intel_get_screen_private(scrn); - struct drm_i915_gem_get_aperture aperture; - int ret; - - /* Default low value in case it gets used during server init. */ - intel->max_tiling_size = 4 * 1024 * 1024; - - ret = - ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (ret == 0) { + if (aperture->aper_available_size) /* Let objects be tiled up to the size where only 4 would fit in * the aperture, presuming worst case alignment. */ - intel->max_tiling_size = aperture.aper_available_size / 4; - if (!IS_I965G(intel)) - intel->max_tiling_size /= 2; - } + intel->max_tiling_size = aperture->aper_available_size / 4; + else + intel->max_tiling_size = 4 * 1024 * 1024; } void i830_set_gem_max_sizes(ScrnInfoPtr scrn) { - i830_set_max_gtt_map_size(scrn); - i830_set_max_tiling_size(scrn); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct drm_i915_gem_get_aperture aperture; + + aperture.aper_available_size = 0; + ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + i830_set_max_bo_size(intel, &aperture); + i830_set_max_gtt_map_size(intel, &aperture); + i830_set_max_tiling_size(intel, &aperture); } diff --git a/src/i830_quirks.c b/src/i830_quirks.c index 1d9b681c..28585693 100644 --- a/src/i830_quirks.c +++ b/src/i830_quirks.c @@ -73,7 +73,7 @@ static void i830_dmi_store_##field(void) \ int ret;\ f = fopen(DMIID_FILE(field), "r");\ if (f == NULL) {\ - xfree(i830_dmi_data[field]); i830_dmi_data[field] = NULL;\ + free(i830_dmi_data[field]); i830_dmi_data[field] = NULL;\ return;\ }\ ret = fread(i830_dmi_data[field], 64, 1, f); \ @@ -104,11 +104,11 @@ static void i830_dmi_scan(void) int i; for (i = 0; i < dmi_data_max; i++) { - i830_dmi_data[i] = xcalloc(64, sizeof(char)); + i830_dmi_data[i] = calloc(64, sizeof(char)); if (!i830_dmi_data[i]) { int j; for (j = 0; j < i; j++) { - xfree(i830_dmi_data[j]); + free(i830_dmi_data[j]); i830_dmi_data[i] = NULL; } return; @@ -426,5 +426,5 @@ void i830_fixup_devices(ScrnInfoPtr scrn) for (i = 0; i < dmi_data_max; i++) if (i830_dmi_data[i]) - xfree(i830_dmi_data[i]); + free(i830_dmi_data[i]); } diff --git a/src/i830_render.c b/src/i830_render.c index da075d93..43f4e28b 100644 --- a/src/i830_render.c +++ b/src/i830_render.c @@ -126,19 +126,18 @@ static struct blendinfo i830_blend_op[] = { {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, }; -/* The x8* formats could use MT_32BIT_X* on 855+, but since we implement - * workarounds for 830/845 anyway, we just rely on those whether the hardware - * could handle it for us or not. - */ static struct formatinfo i830_tex_formats[] = { - {PICT_a8r8g8b8, MT_32BIT_ARGB8888}, - {PICT_x8r8g8b8, MT_32BIT_ARGB8888}, - {PICT_a8b8g8r8, MT_32BIT_ABGR8888}, - {PICT_x8b8g8r8, MT_32BIT_ABGR8888}, - {PICT_r5g6b5, MT_16BIT_RGB565}, - {PICT_a1r5g5b5, MT_16BIT_ARGB1555}, - {PICT_x1r5g5b5, MT_16BIT_ARGB1555}, - {PICT_a8, MT_8BIT_A8}, + {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, + {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, + {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, + {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, + {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, + {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, +}; + +static struct formatinfo i855_tex_formats[] = { + {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, + {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, }; static Bool i830_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format) @@ -221,61 +220,26 @@ static Bool i830_get_blend_cntl(ScrnInfoPtr scrn, int op, PicturePtr mask, return TRUE; } -static Bool i830_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, - int unit) +static uint32_t i8xx_get_card_format(intel_screen_private *intel, + PicturePtr picture) { - if (picture->repeatType > RepeatReflect) { - intel_debug_fallback(scrn, "Unsupported picture repeat %d\n", - picture->repeatType); - return FALSE; - } + int i; - if (picture->filter != PictFilterNearest && - picture->filter != PictFilterBilinear) { - intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", - picture->filter); - return FALSE; + for (i = 0; i < sizeof(i830_tex_formats) / sizeof(i830_tex_formats[0]); + i++) { + if (i830_tex_formats[i].fmt == picture->format) + return i830_tex_formats[i].card_fmt; } - if (picture->pDrawable) { - int w, h, i; - - w = picture->pDrawable->width; - h = picture->pDrawable->height; - if ((w > 2048) || (h > 2048)) { - intel_debug_fallback(scrn, - "Picture w/h too large (%dx%d)\n", - w, h); - return FALSE; - } - - for (i = 0; - i < sizeof(i830_tex_formats) / sizeof(i830_tex_formats[0]); + if (IS_I85X(intel) || IS_I865G(intel)) { + for (i = 0; i < sizeof(i855_tex_formats) / sizeof(i855_tex_formats[0]); i++) { - if (i830_tex_formats[i].fmt == picture->format) - break; - } - if (i == sizeof(i830_tex_formats) / sizeof(i830_tex_formats[0])) - { - intel_debug_fallback(scrn, "Unsupported picture format " - "0x%x\n", - (int)picture->format); - return FALSE; + if (i855_tex_formats[i].fmt == picture->format) + return i855_tex_formats[i].card_fmt; } } - return TRUE; -} - -static uint32_t i8xx_get_card_format(PicturePtr picture) -{ - int i; - for (i = 0; i < sizeof(i830_tex_formats) / sizeof(i830_tex_formats[0]); - i++) { - if (i830_tex_formats[i].fmt == picture->format) - return i830_tex_formats[i].card_fmt; - } - FatalError("Unsupported format type %d\n", picture->format); + return 0; } static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) @@ -297,8 +261,6 @@ static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) else texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; - format = i8xx_get_card_format(picture); - switch (picture->repeatType) { case RepeatNone: wrap_mode = TEXCOORDMODE_CLAMP_BORDER; @@ -331,63 +293,59 @@ static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) } filter |= (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); - { - if (pixmap->drawable.bitsPerPixel == 8) - format |= MAPSURF_8BIT; - else if (pixmap->drawable.bitsPerPixel == 16) - format |= MAPSURF_16BIT; - else - format |= MAPSURF_32BIT; - - if (i830_pixmap_tiled(pixmap)) { - tiling_bits = TM0S1_TILED_SURFACE; - if (i830_get_pixmap_intel(pixmap)->tiling - == I915_TILING_Y) - tiling_bits |= TM0S1_TILE_WALK; - } else - tiling_bits = 0; - - ATOMIC_BATCH(10); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | - LOAD_TEXTURE_MAP(unit) | 4); - OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_SAMPLER, 0, 0); - OUT_BATCH(((pixmap->drawable.height - - 1) << TM0S1_HEIGHT_SHIFT) | ((pixmap->drawable.width - - 1) << - TM0S1_WIDTH_SHIFT) | - format | tiling_bits); - OUT_BATCH((pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); - OUT_BATCH(filter); - OUT_BATCH(0); /* default color */ - OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | - ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | - texcoordtype | ENABLE_ADDR_V_CNTL | - TEXCOORD_ADDR_V_MODE(wrap_mode) | - ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode)); - /* map texel stream */ - OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); - if (unit == 0) - OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | - TEXBIND_SET1(TEXCOORDSRC_KEEP) | - TEXBIND_SET2(TEXCOORDSRC_KEEP) | - TEXBIND_SET3(TEXCOORDSRC_KEEP)); - else - OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | - TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | - TEXBIND_SET2(TEXCOORDSRC_KEEP) | - TEXBIND_SET3(TEXCOORDSRC_KEEP)); - OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | (unit << 16) | - DISABLE_TEX_STREAM_BUMP | - ENABLE_TEX_STREAM_COORD_SET | - TEX_STREAM_COORD_SET(unit) | - ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(unit)); - ADVANCE_BATCH(); - } + if (i830_pixmap_tiled(pixmap)) { + tiling_bits = TM0S1_TILED_SURFACE; + if (i830_get_pixmap_intel(pixmap)->tiling + == I915_TILING_Y) + tiling_bits |= TM0S1_TILE_WALK; + } else + tiling_bits = 0; + + format = i8xx_get_card_format(intel, picture); + + assert(intel->in_batch_atomic); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_MAP(unit) | 4); + OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(((pixmap->drawable.height - + 1) << TM0S1_HEIGHT_SHIFT) | ((pixmap->drawable.width - + 1) << + TM0S1_WIDTH_SHIFT) | + format | tiling_bits); + OUT_BATCH((pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + OUT_BATCH(filter); + OUT_BATCH(0); /* default color */ + OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | + texcoordtype | ENABLE_ADDR_V_CNTL | + TEXCOORD_ADDR_V_MODE(wrap_mode) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode)); + /* map texel stream */ + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + if (unit == 0) + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_KEEP) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + else + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | (unit << 16) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(unit) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(unit)); } Bool -i830_check_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, - PicturePtr dest_picture) +i830_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) { ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; uint32_t tmp1; @@ -414,25 +372,77 @@ i830_check_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, } } - if (!i830_check_composite_texture(scrn, source_picture, 0)) { - intel_debug_fallback(scrn, "Check Src picture texture\n"); + if (!i830_get_dest_format(dest_picture, &tmp1)) { + intel_debug_fallback(scrn, "Get Color buffer format\n"); return FALSE; } - if (mask_picture != NULL - && !i830_check_composite_texture(scrn, mask_picture, 1)) { - intel_debug_fallback(scrn, "Check Mask picture texture\n"); + + if (width > 2048 || height > 2048) { + intel_debug_fallback(scrn, "Operation is too large (%d, %d)\n", width, height); return FALSE; } - if (!i830_get_dest_format(dest_picture, &tmp1)) { - intel_debug_fallback(scrn, "Get Color buffer format\n"); + return TRUE; +} + +Bool +i830_check_composite_target(PixmapPtr pixmap) +{ + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048) + return FALSE; + + if(!intel_check_pitch_3d(pixmap)) return FALSE; - } return TRUE; } Bool +i830_check_composite_texture(ScreenPtr screen, PicturePtr picture) +{ + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (picture->repeatType > RepeatReflect) { + intel_debug_fallback(scrn, "Unsupported picture repeat %d\n", + picture->repeatType); + return FALSE; + } + + if (picture->filter != PictFilterNearest && + picture->filter != PictFilterBilinear) { + intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", + picture->filter); + return FALSE; + } + + if (picture->pDrawable) { + int w, h; + + w = picture->pDrawable->width; + h = picture->pDrawable->height; + if ((w > 2048) || (h > 2048)) { + intel_debug_fallback(scrn, + "Picture w/h too large (%dx%d)\n", + w, h); + return FALSE; + } + + /* XXX we can use the xrgb32 types if there the picture covers the clip */ + if (!i8xx_get_card_format(intel, picture)) { + intel_debug_fallback(scrn, "Unsupported picture format " + "0x%x\n", + (int)picture->format); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + +Bool i830_prepare_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, PicturePtr dest_picture, PixmapPtr source, PixmapPtr mask, PixmapPtr dest) @@ -456,6 +466,20 @@ i830_prepare_composite(int op, PicturePtr source_picture, if (!intel_check_pitch_3d(source)) return FALSE; if (mask) { + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i830_blend_op[op].src_alpha && + (i830_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, "Component alpha not " + "supported with source alpha and " + "source value blending.\n"); + return FALSE; + } + } if (!intel_check_pitch_3d(mask)) return FALSE; } @@ -560,7 +584,7 @@ static void i830_emit_composite_state(ScrnInfoPtr scrn) IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_RENDER; - ATOMIC_BATCH(21); + assert(intel->in_batch_atomic); if (i830_pixmap_tiled(intel->render_dest)) { tiling_bits = BUF_3D_TILED_SURFACE; @@ -626,8 +650,6 @@ static void i830_emit_composite_state(ScrnInfoPtr scrn) } OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt); - ADVANCE_BATCH(); - i830_texture_setup(intel->render_source_picture, intel->render_source, 0); if (intel->render_mask) { i830_texture_setup(intel->render_mask_picture, @@ -767,8 +789,6 @@ i830_emit_composite_primitive(PixmapPtr dest, num_floats = 3 * per_vertex; - ATOMIC_BATCH(1 + num_floats); - OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (num_floats - 1)); OUT_BATCH_F(dstX + w); OUT_BATCH_F(dstY + h); @@ -814,8 +834,6 @@ i830_emit_composite_primitive(PixmapPtr dest, OUT_BATCH_F(mask_w[0]); } } - - ADVANCE_BATCH(); } /** diff --git a/src/i830_sdvo.c b/src/i830_sdvo.c index 393804ea..a9275afa 100644 --- a/src/i830_sdvo.c +++ b/src/i830_sdvo.c @@ -1723,7 +1723,7 @@ i830_sdvo_output_setup (xf86OutputPtr output, uint16_t flag) } /* if exist origin name it will be freed in xf86OutputRename() */ - dev_priv->name = xalloc(strlen(name_prefix) + strlen(name_suffix) + 1); + dev_priv->name = malloc(strlen(name_prefix) + strlen(name_suffix) + 1); strcpy (dev_priv->name, name_prefix); strcat (dev_priv->name, name_suffix); @@ -1740,11 +1740,11 @@ i830_sdvo_output_setup (xf86OutputPtr output, uint16_t flag) if (output->randr_output) { int nameLength = strlen(dev_priv->name); RROutputPtr randr_output = output->randr_output; - char *name = xalloc(nameLength + 1); + char *name = malloc(nameLength + 1); if (name) { if (randr_output->name != (char *) (randr_output + 1)) - xfree(randr_output->name); + free(randr_output->name); randr_output->name = name; randr_output->nameLength = nameLength; memcpy(randr_output->name, dev_priv->name, nameLength); @@ -1825,10 +1825,10 @@ i830_sdvo_detect(xf86OutputPtr output) /* Check EDID in DVI-I case */ edid_mon = xf86OutputGetEDID (output, intel_output->pDDCBus); if (edid_mon && !DIGITAL(edid_mon->features.input_type)) { - xfree(edid_mon); + free(edid_mon); return XF86OutputStatusDisconnected; } - xfree(edid_mon); + free(edid_mon); } return XF86OutputStatusConnected; } @@ -1898,7 +1898,7 @@ i830_sdvo_get_tv_mode(DisplayModePtr *head, int width, int height, { DisplayModePtr mode; - mode = xcalloc(1, sizeof(*mode)); + mode = calloc(1, sizeof(*mode)); if (mode == NULL) return; @@ -2020,14 +2020,14 @@ i830_sdvo_destroy (xf86OutputPtr output) RROutputPtr randr_output = output->randr_output; if (randr_output->name && randr_output->name != (char *) (randr_output + 1)) - xfree(randr_output->name); + free(randr_output->name); } if (dev_priv->sdvo_lvds_fixed_mode) xf86DeleteMode(&dev_priv->sdvo_lvds_fixed_mode, dev_priv->sdvo_lvds_fixed_mode); - xfree (intel_output); + free (intel_output); } } diff --git a/src/i830_tv.c b/src/i830_tv.c index 8f37f3c8..71fd7df2 100644 --- a/src/i830_tv.c +++ b/src/i830_tv.c @@ -1570,7 +1570,7 @@ static void i830_tv_destroy (xf86OutputPtr output) { if (output->driver_private) - xfree (output->driver_private); + free (output->driver_private); } #ifdef RANDR_12_INTERFACE @@ -1807,16 +1807,16 @@ i830_tv_set_property(xf86OutputPtr output, Atom property, memcpy (&atom, value->data, 4); name = NameForAtom (atom); - val = xalloc (strlen (name) + 1); + val = malloc (strlen (name) + 1); if (!val) return FALSE; strcpy (val, name); if (!i830_tv_mode_lookup (val)) { - xfree (val); + free (val); return FALSE; } - xfree (dev_priv->tv_format); + free (dev_priv->tv_format); dev_priv->tv_format = val; if (intel->starting || output->crtc == NULL) diff --git a/src/i830_uxa.c b/src/i830_uxa.c index 78237046..03a01372 100644 --- a/src/i830_uxa.c +++ b/src/i830_uxa.c @@ -36,6 +36,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "i830.h" #include "i810_reg.h" #include "i915_drm.h" +#include "brw_defines.h" #include <string.h> #include <sys/mman.h> #include <errno.h> @@ -78,7 +79,31 @@ const int I830PatternROP[16] = { ROP_1 }; -static int uxa_pixmap_index; +#if HAS_DEVPRIVATEKEYREC +DevPrivateKeyRec uxa_pixmap_index; +#else +int uxa_pixmap_index; +#endif + +static void +ironlake_blt_workaround(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Ironlake has a limitation that a 3D or Media command can't + * be the first command after a BLT, unless it's + * non-pipelined. Instead of trying to track it and emit a + * command at the right time, we just emit a dummy + * non-pipelined 3D instruction after each blit. + */ + + if (IS_IGDNG(intel)) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} Bool i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table, @@ -93,7 +118,7 @@ i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table, bo_table[0] = intel->batch_bo; if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) { - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); bo_table[0] = intel->batch_bo; if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) { @@ -127,14 +152,13 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap, { ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); - int pitch, pitch_align; - int size; + int pitch, size; if (*tiling != I915_TILING_NONE) { /* First check whether tiling is necessary. */ - pitch_align = intel->accel_pixmap_pitch_alignment; - size = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8, - pitch_align) * ALIGN (h, 2); + pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8; + pitch = ROUND_TO(pitch, intel->accel_pixmap_pitch_alignment); + size = pitch * ALIGN (h, 2); if (!IS_I965G(intel)) { /* Older hardware requires fences to be pot size * aligned with a minimum of 1 MiB, so causes @@ -142,6 +166,12 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap, */ if (size < 1024*1024/2) *tiling = I915_TILING_NONE; + + /* Gen 2/3 has a maximum stride for tiling of + * 8192 bytes. + */ + if (pitch > KB(8)) + *tiling = I915_TILING_NONE; } else if (size <= 4096) { /* Disable tiling beneath a page size, we will not see * any benefit from reducing TLB misses and instead @@ -154,29 +184,19 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap, pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8; if (pitch <= 256) *tiling = I915_TILING_NONE; - repeat: - if (*tiling == I915_TILING_NONE) { - pitch_align = intel->accel_pixmap_pitch_alignment; - } else { - pitch_align = 512; - } - *stride = ROUND_TO(pitch, pitch_align); + if (*tiling != I915_TILING_NONE) { + int aligned_h; - if (*tiling == I915_TILING_NONE) { - /* Round the height up so that the GPU's access to a 2x2 aligned - * subspan doesn't address an invalid page offset beyond the - * end of the GTT. - */ - size = *stride * ALIGN(h, 2); - } else { - int aligned_h = h; if (*tiling == I915_TILING_X) aligned_h = ALIGN(h, 8); else aligned_h = ALIGN(h, 32); - *stride = i830_get_fence_pitch(intel, *stride, *tiling); + *stride = i830_get_fence_pitch(intel, + ROUND_TO(pitch, 512), + *tiling); + /* Round the object up to the size of the fence it will live in * if necessary. We could potentially make the kernel allocate * a larger aperture space and just bind the subset of pages in, @@ -184,17 +204,45 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap, * with drm_intel_bufmgr_check_aperture(). */ size = i830_get_fence_size(intel, *stride * aligned_h); - assert(size >= *stride * aligned_h); + + if (size > intel->max_tiling_size) + *tiling = I915_TILING_NONE; } - if (*tiling != I915_TILING_NONE && size > intel->max_tiling_size) { - *tiling = I915_TILING_NONE; - goto repeat; + if (*tiling == I915_TILING_NONE) { + /* Round the height up so that the GPU's access to a 2x2 aligned + * subspan doesn't address an invalid page offset beyond the + * end of the GTT. + */ + *stride = ROUND_TO(pitch, intel->accel_pixmap_pitch_alignment); + size = *stride * ALIGN(h, 2); } return size; } +static Bool +i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) +{ + ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum]; + + if (!UXA_PM_IS_SOLID(drawable, planemask)) { + intel_debug_fallback(scrn, "planemask is not solid\n"); + return FALSE; + } + + switch (drawable->bitsPerPixel) { + case 8: + case 16: + case 32: + break; + default: + return FALSE; + } + + return TRUE; +} + /** * Sets up hardware state for a series of solid fills. */ @@ -203,32 +251,14 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) { ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); - unsigned long pitch; drm_intel_bo *bo_table[] = { NULL, /* batch_bo */ i830_get_pixmap_bo(pixmap), }; - if (!UXA_PM_IS_SOLID(&pixmap->drawable, planemask)) { - intel_debug_fallback(scrn, "planemask is not solid\n"); - return FALSE; - } - - if (pixmap->drawable.bitsPerPixel == 24) { - intel_debug_fallback(scrn, "solid 24bpp unsupported!\n"); - return FALSE; - } - - if (pixmap->drawable.bitsPerPixel < 8) { - intel_debug_fallback(scrn, "under 8bpp pixmaps unsupported\n"); - return FALSE; - } - if (!intel_check_pitch_2d(pixmap)) return FALSE; - pitch = i830_pixmap_pitch(pixmap); - if (!i830_pixmap_pitch_is_aligned(pixmap)) { intel_debug_fallback(scrn, "pixmap pitch not aligned"); return FALSE; @@ -248,11 +278,10 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) case 32: /* RGB8888 */ intel->BR[13] |= ((1 << 24) | (1 << 25)); - if (pixmap->drawable.depth == 24) - fg |= 0xff000000; break; } intel->BR[16] = fg; + return TRUE; } @@ -272,6 +301,9 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) if (y2 > pixmap->drawable.height) y2 = pixmap->drawable.height; + if (x2 <= x1 || y2 <= y1) + return; + pitch = i830_pixmap_pitch(pixmap); { @@ -299,6 +331,8 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) OUT_BATCH(intel->BR[16]); ADVANCE_BATCH(); } + + ironlake_blt_workaround(scrn); } static void i830_uxa_done_solid(PixmapPtr pixmap) @@ -313,39 +347,57 @@ static void i830_uxa_done_solid(PixmapPtr pixmap) * - support planemask using FULL_BLT_CMD? */ static Bool -i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, - int ydir, int alu, Pixel planemask) +i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest, + int alu, Pixel planemask) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); - drm_intel_bo *bo_table[] = { - NULL, /* batch_bo */ - i830_get_pixmap_bo(source), - i830_get_pixmap_bo(dest), - }; + if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) { intel_debug_fallback(scrn, "planemask is not solid"); return FALSE; } - if (dest->drawable.bitsPerPixel < 8) { - intel_debug_fallback(scrn, "under 8bpp pixmaps unsupported\n"); + if (source->drawable.bitsPerPixel != dest->drawable.bitsPerPixel) { + intel_debug_fallback(scrn, "mixed bpp copies unsupported\n"); return FALSE; } - - if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + switch (source->drawable.bitsPerPixel) { + case 8: + case 16: + case 32: + break; + default: return FALSE; + } if (!intel_check_pitch_2d(source)) return FALSE; if (!intel_check_pitch_2d(dest)) return FALSE; + return TRUE; +} + +static Bool +i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, + int ydir, int alu, Pixel planemask) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + i830_get_pixmap_bo(source), + i830_get_pixmap_bo(dest), + }; + + if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + intel->render_source = source; intel->BR[13] = I830CopyROP[alu] << 16; - switch (source->drawable.bitsPerPixel) { case 8: break; @@ -356,6 +408,7 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, intel->BR[13] |= ((1 << 25) | (1 << 24)); break; } + return TRUE; } @@ -416,6 +469,7 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, ADVANCE_BATCH(); } + ironlake_blt_workaround(scrn); } static void i830_uxa_done_copy(PixmapPtr dest) @@ -433,6 +487,10 @@ static void i830_uxa_done_copy(PixmapPtr dest) void i830_done_composite(PixmapPtr dest) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->vertex_flush) + intel->vertex_flush(intel); i830_debug_flush(scrn); } @@ -518,16 +576,6 @@ Bool i830_transform_is_affine(PictTransformPtr t) return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; } -struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap) -{ - return dixLookupPrivate(&pixmap->devPrivates, &uxa_pixmap_index); -} - -static void i830_uxa_set_pixmap_intel(PixmapPtr pixmap, struct intel_pixmap *intel) -{ - dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, intel); -} - dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap) { struct intel_pixmap *intel; @@ -547,21 +595,35 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo) priv = i830_get_pixmap_intel(pixmap); + if (priv == NULL && bo == NULL) + return; + if (priv != NULL) { + if (priv->bo == bo) + return; + if (list_is_empty(&priv->batch)) { dri_bo_unreference(priv->bo); + } else if (!drm_intel_bo_is_reusable(priv->bo)) { + dri_bo_unreference(priv->bo); + list_del(&priv->batch); + list_del(&priv->flush); } else { list_add(&priv->in_flight, &intel->in_flight); priv = NULL; } + + if (intel->render_current_dest == pixmap) + intel->render_current_dest = NULL; } if (bo != NULL) { + uint32_t tiling; uint32_t swizzle_mode; int ret; if (priv == NULL) { - priv = xcalloc(1, sizeof (struct intel_pixmap)); + priv = calloc(1, sizeof (struct intel_pixmap)); if (priv == NULL) goto BAIL; @@ -574,21 +636,24 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo) priv->stride = i830_pixmap_pitch(pixmap); ret = drm_intel_bo_get_tiling(bo, - &priv->tiling, + &tiling, &swizzle_mode); if (ret != 0) { FatalError("Couldn't get tiling on bo %p: %s\n", bo, strerror(-ret)); } + + priv->tiling = tiling; + priv->busy = -1; } else { if (priv != NULL) { - xfree(priv); + free(priv); priv = NULL; } } BAIL: - i830_uxa_set_pixmap_intel(pixmap, priv); + i830_set_pixmap_intel(pixmap, priv); } static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access) @@ -600,40 +665,30 @@ static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access) int ret; if (!list_is_empty(&priv->batch) && - (access == UXA_ACCESS_RW || priv->batch_write_domain)) - intel_batch_submit(scrn); - - if (bo) { - if (intel->kernel_exec_fencing) { - if (bo->size > intel->max_gtt_map_size || !intel->have_gem) { - ret = dri_bo_map(bo, access == UXA_ACCESS_RW); - if (ret != 0) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "%s: bo map failed: %s\n", - __FUNCTION__, - strerror(-ret)); - return FALSE; - } - } else { - ret = drm_intel_gem_bo_map_gtt(bo); - if (ret != 0) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "%s: gtt bo map failed: %s\n", - __FUNCTION__, - strerror(-ret)); - return FALSE; - } - } - pixmap->devPrivate.ptr = bo->virtual; - } else { - if (drm_intel_bo_pin(bo, 4096) != 0) - return FALSE; - drm_intel_gem_bo_start_gtt_access(bo, access == - UXA_ACCESS_RW); - pixmap->devPrivate.ptr = intel->FbBase + bo->offset; + (access == UXA_ACCESS_RW || priv->batch_write)) + intel_batch_submit(scrn, FALSE); + + if (bo->size > intel->max_gtt_map_size) { + ret = dri_bo_map(bo, access == UXA_ACCESS_RW); + if (ret != 0) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map failed: %s\n", + __FUNCTION__, + strerror(-ret)); + return FALSE; + } + } else { + ret = drm_intel_gem_bo_map_gtt(bo); + if (ret != 0) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: gtt bo map failed: %s\n", + __FUNCTION__, + strerror(-ret)); + return FALSE; } - } else - i830_wait_ring_idle(scrn); + } + pixmap->devPrivate.ptr = bo->virtual; + priv->busy = 0; return TRUE; } @@ -648,206 +703,207 @@ static void i830_uxa_finish_access(PixmapPtr pixmap) if (bo == intel->front_buffer->bo) intel->need_flush = TRUE; - if (intel->kernel_exec_fencing) { - if (bo->size > intel->max_gtt_map_size) - dri_bo_unmap(bo); - else - drm_intel_gem_bo_unmap_gtt(bo); - pixmap->devPrivate.ptr = NULL; - } + if (bo->size > intel->max_gtt_map_size) + dri_bo_unmap(bo); + else + drm_intel_gem_bo_unmap_gtt(bo); + pixmap->devPrivate.ptr = NULL; } -static Bool i830_bo_put_image(PixmapPtr pixmap, dri_bo *bo, char *src, int src_pitch, int w, int h) +static Bool i830_uxa_pixmap_put_image(PixmapPtr pixmap, + char *src, int src_pitch, + int x, int y, int w, int h) { + struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap); int stride = i830_pixmap_pitch(pixmap); - - /* fill alpha channel */ - if (pixmap->drawable.depth == 24) { - pixman_image_t *src_image, *dst_image; - - src_image = pixman_image_create_bits (PIXMAN_x8r8g8b8, - w, h, - (uint32_t *) src, src_pitch); - - dst_image = pixman_image_create_bits (PIXMAN_a8r8g8b8, - w, h, - (uint32_t *) bo->virtual, stride); - - if (src_image && dst_image) - pixman_image_composite (PictOpSrc, - src_image, NULL, dst_image, - 0, 0, - 0, 0, - 0, 0, - w, h); - - if (src_image) - pixman_image_unref (src_image); - - if (dst_image) - pixman_image_unref (dst_image); - - if (src_image == NULL || dst_image == NULL) - return FALSE; - } else if (src_pitch == stride) { - memcpy (bo->virtual, src, stride * h); - } else { - char *dst = bo->virtual; - int row_length = w * pixmap->drawable.bitsPerPixel/8; + int ret = FALSE; + + if (src_pitch == stride && w == pixmap->drawable.width && priv->tiling == I915_TILING_NONE) { + ret = drm_intel_bo_subdata(priv->bo, y * stride, stride * h, src) == 0; + } else if (drm_intel_gem_bo_map_gtt(priv->bo) == 0) { + char *dst = priv->bo->virtual; + int cpp = pixmap->drawable.bitsPerPixel/8; + int row_length = w * cpp; int num_rows = h; - while (num_rows--) { + if (row_length == src_pitch && src_pitch == stride) + num_rows = 1, row_length *= h; + dst += y * stride + x * cpp; + do { memcpy (dst, src, row_length); src += src_pitch; dst += stride; - } + } while (--num_rows); + drm_intel_gem_bo_unmap_gtt(priv->bo); + ret = TRUE; } - return TRUE; + return ret; } -static Bool -i830_uxa_pixmap_swap_bo_with_image(PixmapPtr pixmap, - char *src, int src_pitch) +static Bool i830_uxa_put_image(PixmapPtr pixmap, + int x, int y, + int w, int h, + char *src, int src_pitch) { - ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; - intel_screen_private *intel = intel_get_screen_private(scrn); struct intel_pixmap *priv; - dri_bo *bo; - uint32_t tiling = I915_TILING_X; - int stride; - int w = pixmap->drawable.width; - int h = pixmap->drawable.height; - Bool ret; priv = i830_get_pixmap_intel(pixmap); + if (!intel_pixmap_is_busy(priv)) { + /* bo is not busy so can be replaced without a stall, upload in-place. */ + return i830_uxa_pixmap_put_image(pixmap, src, src_pitch, x, y, w, h); + } else { + ScreenPtr screen = pixmap->drawable.pScreen; + + if (x == 0 && y == 0 && + w == pixmap->drawable.width && + h == pixmap->drawable.height) + { + intel_screen_private *intel = intel_get_screen_private(xf86Screens[screen->myNum]); + uint32_t tiling = priv->tiling; + int size, stride; + dri_bo *bo; + + /* Replace busy bo. */ + size = i830_uxa_pixmap_compute_size (pixmap, w, h, + &tiling, &stride); + if (size > intel->max_gtt_map_size) + return FALSE; - if (priv->batch_read_domains || drm_intel_bo_busy(priv->bo)) { - unsigned int size; + bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); + if (bo == NULL) + return FALSE; - size = i830_uxa_pixmap_compute_size (pixmap, w, h, - &tiling, &stride); - if (size > intel->max_gtt_map_size) - return FALSE; + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(bo, &tiling, stride); - bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); - if (bo == NULL) - return FALSE; + screen->ModifyPixmapHeader(pixmap, + w, h, + 0, 0, + stride, NULL); + i830_set_pixmap_bo(pixmap, bo); + dri_bo_unreference(bo); - if (tiling != I915_TILING_NONE) - drm_intel_bo_set_tiling(bo, &tiling, stride); + return i830_uxa_pixmap_put_image(pixmap, src, src_pitch, 0, 0, w, h); + } + else + { + PixmapPtr scratch; + Bool ret; + + /* Upload to a linear buffer and queue a blit. */ + scratch = (*screen->CreatePixmap)(screen, w, h, + pixmap->drawable.depth, + UXA_CREATE_PIXMAP_FOR_MAP); + if (!scratch) + return FALSE; - dri_bo_unreference(priv->bo); - priv->bo = bo; - priv->tiling = tiling; - priv->batch_read_domains = priv->batch_write_domain = 0; - priv->flush_read_domains = priv->flush_write_domain = 0; - list_del(&priv->batch); - list_del(&priv->flush); - pixmap->drawable.pScreen->ModifyPixmapHeader(pixmap, - w, h, - 0, 0, - stride, NULL); - } else { - bo = priv->bo; - stride = i830_pixmap_pitch(pixmap); - } + ret = i830_uxa_pixmap_put_image(scratch, src, src_pitch, 0, 0, w, h); + if (ret) { + GCPtr gc = GetScratchGC(pixmap->drawable.depth, screen); + if (gc) { + ValidateGC(&pixmap->drawable, gc); - if (drm_intel_gem_bo_map_gtt(bo)) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "%s: bo map failed\n", __FUNCTION__); - return FALSE; + (*gc->ops->CopyArea)(&scratch->drawable, + &pixmap->drawable, + gc, 0, 0, w, h, x, y); + + FreeScratchGC(gc); + } else + ret = FALSE; + } + + (*screen->DestroyPixmap)(scratch); + return ret; + } } +} - ret = i830_bo_put_image(pixmap, bo, src, src_pitch, w, h); +static Bool i830_uxa_pixmap_get_image(PixmapPtr pixmap, + int x, int y, int w, int h, + char *dst, int dst_pitch) +{ + struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap); + int stride = i830_pixmap_pitch(pixmap); - drm_intel_gem_bo_unmap_gtt(bo); + if (dst_pitch == stride && w == pixmap->drawable.width) { + return drm_intel_bo_get_subdata(priv->bo, y * stride, stride * h, dst) == 0; + } else { + char *src; + int cpp; - return ret; + if (drm_intel_bo_map(priv->bo, FALSE)) + return FALSE; + + cpp = pixmap->drawable.bitsPerPixel/8; + src = (char *) priv->bo->virtual + y * stride + x * cpp; + w *= cpp; + do { + memcpy(dst, src, w); + src += stride; + dst += dst_pitch; + } while (--h); + + drm_intel_bo_unmap(priv->bo); + + return TRUE; + } } -static Bool i830_uxa_put_image(PixmapPtr pixmap, +static Bool i830_uxa_get_image(PixmapPtr pixmap, int x, int y, int w, int h, - char *src, int src_pitch) + char *dst, int dst_pitch) { - ScreenPtr screen = pixmap->drawable.pScreen; - ScrnInfoPtr scrn = xf86Screens[screen->myNum]; - PixmapPtr scratch; struct intel_pixmap *priv; - GCPtr gc; + PixmapPtr scratch = NULL; Bool ret; - if (x == 0 && y == 0 && - w == pixmap->drawable.width && - h == pixmap->drawable.height) - { - /* Replace GPU hot bo with new CPU data. */ - return i830_uxa_pixmap_swap_bo_with_image(pixmap, - src, src_pitch); - } + /* The presumption is that we wish to keep the target hot, so + * copy to a new bo and move that to the CPU in preference to + * causing ping-pong of the original. + * + * Also the gpu is much faster at detiling. + */ priv = i830_get_pixmap_intel(pixmap); - if (priv->batch_read_domains || - drm_intel_bo_busy(priv->bo) || - pixmap->drawable.depth == 24) { - dri_bo *bo; - - /* Partial replacement, copy incoming image to a bo and blit. */ - scratch = (*screen->CreatePixmap)(screen, w, h, - pixmap->drawable.depth, - UXA_CREATE_PIXMAP_FOR_MAP); + if (intel_pixmap_is_busy(priv) || priv->tiling != I915_TILING_NONE) { + ScreenPtr screen = pixmap->drawable.pScreen; + GCPtr gc; + + /* Copy to a linear buffer and pull. */ + scratch = screen->CreatePixmap(screen, w, h, + pixmap->drawable.depth, + INTEL_CREATE_PIXMAP_TILING_NONE); if (!scratch) return FALSE; - bo = i830_get_pixmap_bo(scratch); - if (drm_intel_gem_bo_map_gtt(bo)) { - (*screen->DestroyPixmap) (scratch); - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "%s: bo map failed\n", __FUNCTION__); + gc = GetScratchGC(pixmap->drawable.depth, screen); + if (!gc) { + screen->DestroyPixmap(scratch); return FALSE; } - ret = i830_bo_put_image(scratch, bo, src, src_pitch, w, h); + ValidateGC(&pixmap->drawable, gc); - drm_intel_gem_bo_unmap_gtt(bo); - - if (ret) { - gc = GetScratchGC(pixmap->drawable.depth, screen); - if (gc) { - ValidateGC(&pixmap->drawable, gc); + gc->ops->CopyArea(&pixmap->drawable, + &scratch->drawable, + gc, x, y, w, h, 0, 0); - (*gc->ops->CopyArea)(&scratch->drawable, - &pixmap->drawable, - gc, 0, 0, w, h, x, y); + FreeScratchGC(gc); - FreeScratchGC(gc); - } else - ret = FALSE; - } + intel_batch_submit(xf86Screens[screen->myNum], FALSE); - (*screen->DestroyPixmap)(scratch); - } else { - /* bo is not busy so can be mapped without a stall, upload in-place. */ - if (drm_intel_gem_bo_map_gtt(priv->bo)) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "%s: bo map failed\n", __FUNCTION__); - return FALSE; - } + x = y = 0; + pixmap = scratch; + } - pixman_blt((uint32_t *)src, priv->bo->virtual, - src_pitch / sizeof(uint32_t), - pixmap->devKind / sizeof(uint32_t), - pixmap->drawable.bitsPerPixel, - pixmap->drawable.bitsPerPixel, - 0, 0, - x, y, - w, h); + ret = i830_uxa_pixmap_get_image(pixmap, x, y, w, h, dst, dst_pitch); - drm_intel_gem_bo_unmap_gtt(priv->bo); - ret = TRUE; - } + if (scratch) + scratch->drawable.pScreen->DestroyPixmap(scratch); return ret; + } void i830_uxa_block_handler(ScreenPtr screen) @@ -883,6 +939,9 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, if (w > 32767 || h > 32767) return NullPixmap; + if (depth == 1) + return fbCreatePixmap(screen, w, h, depth, usage); + if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE && w <= 32 && h <= 32) return fbCreatePixmap(screen, w, h, depth, usage); @@ -900,7 +959,7 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, tiling = I915_TILING_X; if (usage == INTEL_CREATE_PIXMAP_TILING_Y) tiling = I915_TILING_Y; - if (usage == UXA_CREATE_PIXMAP_FOR_MAP) + if (usage == UXA_CREATE_PIXMAP_FOR_MAP || usage == INTEL_CREATE_PIXMAP_TILING_NONE) tiling = I915_TILING_NONE; if (tiling != I915_TILING_NONE) { @@ -911,21 +970,16 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, } size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride); - /* Fail very large allocations on 32-bit systems. Large BOs will - * tend to hit SW fallbacks frequently, and also will tend to fail - * to successfully map when doing SW fallbacks because we overcommit - * address space for BO access. - * - * Note that size should fit in 32 bits. We throw out >32767x32767x4, - * and pitch alignment could get us up to 32768x32767x4. + /* Fail very large allocations. Large BOs will tend to hit SW fallbacks + * frequently, and also will tend to fail to successfully map when doing + * SW fallbacks because we overcommit address space for BO access. */ - if (sizeof(unsigned long) == 4 && - size > (unsigned int)(1024 * 1024 * 1024)) { + if (size > intel->max_bo_size || stride >= KB(32)) { fbDestroyPixmap(pixmap); - return NullPixmap; + return fbCreatePixmap(screen, w, h, depth, usage); } - /* Perform a premilinary search for an in-flight bo */ + /* Perform a preliminary search for an in-flight bo */ if (usage != UXA_CREATE_PIXMAP_FOR_MAP) { int aligned_h; @@ -939,50 +993,63 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, list_foreach_entry(priv, struct intel_pixmap, &intel->in_flight, in_flight) { - if (priv->tiling == tiling && - priv->stride >= stride && - priv->bo->size >= priv->stride * aligned_h) { - list_del(&priv->in_flight); - screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, priv->stride, NULL); - i830_uxa_set_pixmap_intel(pixmap, priv); - return pixmap; + if (priv->tiling != tiling) + continue; + + if (tiling == I915_TILING_NONE) { + if (priv->bo->size < size) + continue; + + priv->stride = stride; + } else { + if (priv->stride < stride || + priv->bo->size < priv->stride * aligned_h) + continue; + + stride = priv->stride; } + + list_del(&priv->in_flight); + screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, stride, NULL); + i830_set_pixmap_intel(pixmap, priv); + return pixmap; } } - priv = xcalloc(1, sizeof (struct intel_pixmap)); + priv = calloc(1, sizeof (struct intel_pixmap)); if (priv == NULL) { fbDestroyPixmap(pixmap); return NullPixmap; } - if (usage == UXA_CREATE_PIXMAP_FOR_MAP) + if (usage == UXA_CREATE_PIXMAP_FOR_MAP) { + priv->busy = 0; priv->bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); - else + } else { + priv->busy = -1; priv->bo = drm_intel_bo_alloc_for_render(intel->bufmgr, "pixmap", size, 0); + } if (!priv->bo) { - xfree(priv); + free(priv); fbDestroyPixmap(pixmap); if (errno == EFBIG) return fbCreatePixmap(screen, w, h, depth, usage); return NullPixmap; } + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(priv->bo, &tiling, stride); priv->stride = stride; priv->tiling = tiling; - if (priv->tiling != I915_TILING_NONE) - drm_intel_bo_set_tiling(priv->bo, - &priv->tiling, - stride); screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, stride, NULL); list_init(&priv->batch); list_init(&priv->flush); - i830_uxa_set_pixmap_intel(pixmap, priv); + i830_set_pixmap_intel(pixmap, priv); } return pixmap; @@ -1006,6 +1073,7 @@ void i830_uxa_create_screen_resources(ScreenPtr screen) if (bo != NULL) { PixmapPtr pixmap = screen->GetScreenPixmap(screen); i830_set_pixmap_bo(pixmap, bo); + i830_get_pixmap_intel(pixmap)->busy = 1; } } @@ -1014,7 +1082,11 @@ Bool i830_uxa_init(ScreenPtr screen) ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); +#if HAS_DIXREGISTERPRIVATEKEY + if (!dixRegisterPrivateKey(&uxa_pixmap_index, PRIVATE_PIXMAP, 0)) +#else if (!dixRequestPrivate(&uxa_pixmap_index, 0)) +#endif return FALSE; intel->uxa_driver = uxa_driver_alloc(); @@ -1027,12 +1099,21 @@ Bool i830_uxa_init(ScreenPtr screen) intel->uxa_driver->uxa_major = 1; intel->uxa_driver->uxa_minor = 0; + intel->render_current_dest = NULL; + intel->prim_offset = 0; + intel->vertex_count = 0; + intel->floats_per_vertex = 0; + intel->last_floats_per_vertex = 0; + intel->vertex_bo = NULL; + /* Solid fill */ + intel->uxa_driver->check_solid = i830_uxa_check_solid; intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid; intel->uxa_driver->solid = i830_uxa_solid; intel->uxa_driver->done_solid = i830_uxa_done_solid; /* Copy */ + intel->uxa_driver->check_copy = i830_uxa_check_copy; intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy; intel->uxa_driver->copy = i830_uxa_copy; intel->uxa_driver->done_copy = i830_uxa_done_copy; @@ -1040,17 +1121,22 @@ Bool i830_uxa_init(ScreenPtr screen) /* Composite */ if (!IS_I9XX(intel)) { intel->uxa_driver->check_composite = i830_check_composite; + intel->uxa_driver->check_composite_target = i830_check_composite_target; + intel->uxa_driver->check_composite_texture = i830_check_composite_texture; intel->uxa_driver->prepare_composite = i830_prepare_composite; intel->uxa_driver->composite = i830_composite; intel->uxa_driver->done_composite = i830_done_composite; } else if (IS_I915G(intel) || IS_I915GM(intel) || IS_I945G(intel) || IS_I945GM(intel) || IS_G33CLASS(intel)) { intel->uxa_driver->check_composite = i915_check_composite; + intel->uxa_driver->check_composite_target = i915_check_composite_target; + intel->uxa_driver->check_composite_texture = i915_check_composite_texture; intel->uxa_driver->prepare_composite = i915_prepare_composite; intel->uxa_driver->composite = i915_composite; intel->uxa_driver->done_composite = i830_done_composite; } else { intel->uxa_driver->check_composite = i965_check_composite; + intel->uxa_driver->check_composite_texture = i965_check_composite_texture; intel->uxa_driver->prepare_composite = i965_prepare_composite; intel->uxa_driver->composite = i965_composite; intel->uxa_driver->done_composite = i830_done_composite; @@ -1058,21 +1144,22 @@ Bool i830_uxa_init(ScreenPtr screen) /* PutImage */ intel->uxa_driver->put_image = i830_uxa_put_image; + intel->uxa_driver->get_image = i830_uxa_get_image; intel->uxa_driver->prepare_access = i830_uxa_prepare_access; intel->uxa_driver->finish_access = i830_uxa_finish_access; intel->uxa_driver->pixmap_is_offscreen = i830_uxa_pixmap_is_offscreen; + screen->CreatePixmap = i830_uxa_create_pixmap; + screen->DestroyPixmap = i830_uxa_destroy_pixmap; + if (!uxa_driver_init(screen, intel->uxa_driver)) { xf86DrvMsg(scrn->scrnIndex, X_ERROR, "UXA initialization failed\n"); - xfree(intel->uxa_driver); + free(intel->uxa_driver); return FALSE; } - screen->CreatePixmap = i830_uxa_create_pixmap; - screen->DestroyPixmap = i830_uxa_destroy_pixmap; - uxa_set_fallback_debug(screen, intel->fallback_debug); return TRUE; diff --git a/src/i830_video.c b/src/i830_video.c index 23163684..7aa16bb6 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -557,7 +557,7 @@ void I830InitVideo(ScreenPtr screen) * adaptors. */ newAdaptors = - xalloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *)); + malloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *)); if (newAdaptors == NULL) return; @@ -574,8 +574,7 @@ void I830InitVideo(ScreenPtr screen) /* Set up textured video if we can do it at this depth and we are on * supported hardware. */ - if (scrn->bitsPerPixel >= 16 && (IS_I9XX(intel) || IS_I965G(intel)) && - !(!IS_I965G(intel) && scrn->displayWidth > 2048)) { + if (scrn->bitsPerPixel >= 16 && (IS_I9XX(intel) || IS_I965G(intel))) { texturedAdaptor = I830SetupImageVideoTextured(screen); if (texturedAdaptor != NULL) { xf86DrvMsg(scrn->scrnIndex, X_INFO, @@ -619,7 +618,7 @@ void I830InitVideo(ScreenPtr screen) if (texturedAdaptor) intel_xvmc_adaptor_init(screen); #endif - xfree(adaptors); + free(adaptors); } static void @@ -818,7 +817,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen) OVERLAY_DEBUG("I830SetupImageVideoOverlay\n"); - if (!(adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec) + + if (!(adapt = calloc(1, sizeof(XF86VideoAdaptorRec) + sizeof(intel_adaptor_private) + sizeof(DevUnion)))) return NULL; @@ -950,16 +949,16 @@ static XF86VideoAdaptorPtr I830SetupImageVideoTextured(ScreenPtr screen) nAttributes = NUM_TEXTURED_ATTRIBUTES; - adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec)); - adaptor_privs = xcalloc(nports, sizeof(intel_adaptor_private)); - devUnions = xcalloc(nports, sizeof(DevUnion)); - attrs = xcalloc(nAttributes, sizeof(XF86AttributeRec)); + adapt = calloc(1, sizeof(XF86VideoAdaptorRec)); + adaptor_privs = calloc(nports, sizeof(intel_adaptor_private)); + devUnions = calloc(nports, sizeof(DevUnion)); + attrs = calloc(nAttributes, sizeof(XF86AttributeRec)); if (adapt == NULL || adaptor_privs == NULL || devUnions == NULL || attrs == NULL) { - xfree(adapt); - xfree(adaptor_privs); - xfree(devUnions); - xfree(attrs); + free(adapt); + free(adaptor_privs); + free(devUnions); + free(attrs); return NULL; } diff --git a/src/i915_3d.c b/src/i915_3d.c index 517c6851..906043b1 100644 --- a/src/i915_3d.c +++ b/src/i915_3d.c @@ -38,7 +38,7 @@ void I915EmitInvarientState(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - ATOMIC_BATCH(24); + assert(intel->in_batch_atomic); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -85,8 +85,13 @@ void I915EmitInvarientState(ScrnInfoPtr scrn) ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 0); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); @@ -104,6 +109,4 @@ void I915EmitInvarientState(ScrnInfoPtr scrn) OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); OUT_BATCH(MI_NOOP); - - ADVANCE_BATCH(); } diff --git a/src/i915_3d.h b/src/i915_3d.h index f85780aa..04531f33 100644 --- a/src/i915_3d.h +++ b/src/i915_3d.h @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 4 -*- */ /* - * Copyright © 2006 Intel Corporation + * Copyright © 2006,2010 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,9 +23,213 @@ * * Authors: * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> * */ +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: +*/ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + /* MASK_* are the unshifted bitmasks of the destination mask in arithmetic * operations */ @@ -38,130 +242,101 @@ #define MASK_SATURATE 0x10 /* Temporary, undeclared regs. Preserved between phases */ -#define FS_R0 ((REG_TYPE_R << 8) | 0) -#define FS_R1 ((REG_TYPE_R << 8) | 1) -#define FS_R2 ((REG_TYPE_R << 8) | 2) -#define FS_R3 ((REG_TYPE_R << 8) | 3) +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) /* Texture coordinate regs. Must be declared. */ -#define FS_T0 ((REG_TYPE_T << 8) | 0) -#define FS_T1 ((REG_TYPE_T << 8) | 1) -#define FS_T2 ((REG_TYPE_T << 8) | 2) -#define FS_T3 ((REG_TYPE_T << 8) | 3) -#define FS_T4 ((REG_TYPE_T << 8) | 4) -#define FS_T5 ((REG_TYPE_T << 8) | 5) -#define FS_T6 ((REG_TYPE_T << 8) | 6) -#define FS_T7 ((REG_TYPE_T << 8) | 7) -#define FS_T8 ((REG_TYPE_T << 8) | 8) -#define FS_T9 ((REG_TYPE_T << 8) | 9) -#define FS_T10 ((REG_TYPE_T << 8) | 10) +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) /* Constant values */ -#define FS_C0 ((REG_TYPE_CONST << 8) | 0) -#define FS_C1 ((REG_TYPE_CONST << 8) | 1) -#define FS_C2 ((REG_TYPE_CONST << 8) | 2) -#define FS_C3 ((REG_TYPE_CONST << 8) | 3) -#define FS_C4 ((REG_TYPE_CONST << 8) | 4) -#define FS_C5 ((REG_TYPE_CONST << 8) | 5) -#define FS_C6 ((REG_TYPE_CONST << 8) | 6) -#define FS_C7 ((REG_TYPE_CONST << 8) | 7) +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) /* Sampler regs */ -#define FS_S0 ((REG_TYPE_S << 8) | 0) -#define FS_S1 ((REG_TYPE_S << 8) | 1) -#define FS_S2 ((REG_TYPE_S << 8) | 2) -#define FS_S3 ((REG_TYPE_S << 8) | 3) +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) /* Output color */ -#define FS_OC ((REG_TYPE_OC << 8) | 0) +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) /* Output depth */ -#define FS_OD ((REG_TYPE_OD << 8) | 0) +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) /* Unpreserved temporary regs */ -#define FS_U0 ((REG_TYPE_U << 8) | 0) -#define FS_U1 ((REG_TYPE_U << 8) | 1) -#define FS_U2 ((REG_TYPE_U << 8) | 2) -#define FS_U3 ((REG_TYPE_U << 8) | 3) - -#define REG_TYPE(reg) ((reg) >> 8) -#define REG_NR(reg) ((reg) & 0xff) - -struct i915_fs_op { - uint32_t ui[3]; +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4) + +#define REG_CHANNEL_MASK 0xf + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum i915_fs_channel { + X_CHANNEL_VAL = 0, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL, + + NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8, + NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8, + NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8, + NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8, + NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8 }; -#define X_CHANNEL_VAL 1 -#define Y_CHANNEL_VAL 2 -#define Z_CHANNEL_VAL 3 -#define W_CHANNEL_VAL 4 -#define ZERO_CHANNEL_VAL 5 -#define ONE_CHANNEL_VAL 6 +#define i915_fs_operand(reg, x, y, z, w) \ + (reg) | \ +(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ +(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ +(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ +(w##_CHANNEL_VAL << W_CHANNEL_SHIFT) /** - * This structure represents the contents of an operand to an i915 fragment - * shader. - * - * It is not a hardware representation, though closely related. - */ -struct i915_fs_operand { - /**< REG_TYPE_* register type */ - int reg; - /**< *_CHANNEL_VAL swizzle value, with optional negation */ - int x; - /**< *_CHANNEL_VAL swizzle value, with optional negation */ - int y; - /**< *_CHANNEL_VAL swizzle value, with optional negation */ - int z; - /**< *_CHANNEL_VAL swizzle value, with optional negation */ - int w; -}; - -/** - * Construct an operand description for the fragment shader. - * - * \param regtype FS_* register used as the source value for X/Y/Z/W sources. - * \param x *_CHANNEL_VAL swizzle value prefix for operand X channel, with - * optional negation. - * \param y *_CHANNEL_VAL swizzle value prefix for operand Y channel, with - * optional negation. - * \param z *_CHANNEL_VAL swizzle value prefix for operand Z channel, with - * optional negation. - * \param w *_CHANNEL_VAL swizzle value prefix for operand W channel, with - * optional negation. - */ -#define i915_fs_operand(reg, x, y, z, w) \ - _i915_fs_operand(reg, \ - x##_CHANNEL_VAL, y##_CHANNEL_VAL, \ - z##_CHANNEL_VAL, w##_CHANNEL_VAL) - -/** - * Construct an oeprand description for using a register with no swizzling + * Construct an operand description for using a register with no swizzling */ #define i915_fs_operand_reg(reg) \ - i915_fs_operand(reg, X, Y, Z, W) - -static inline struct i915_fs_operand -_i915_fs_operand(int reg, int x, int y, int z, int w) -{ - struct i915_fs_operand operand; + i915_fs_operand(reg, X, Y, Z, W) - operand.reg = reg; - operand.x = x; - operand.y = y; - operand.z = z; - operand.w = w; - - return operand; -} +#define i915_fs_operand_reg_negate(reg) \ + i915_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W) /** * Returns an operand containing (0.0, 0.0, 0.0, 0.0). */ -static inline struct i915_fs_operand i915_fs_operand_zero(void) -{ - return i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO); -} +#define i915_fs_operand_zero() i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) /** * Returns an unused operand @@ -171,246 +346,256 @@ static inline struct i915_fs_operand i915_fs_operand_zero(void) /** * Returns an operand containing (1.0, 1.0, 1.0, 1.0). */ -static inline struct i915_fs_operand i915_fs_operand_one(void) -{ - return i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE); -} - -static inline int i915_get_hardware_channel_val(int channel_val) -{ - if (channel_val < 0) - channel_val = -channel_val; - - switch (channel_val) { - case X_CHANNEL_VAL: - return SRC_X; - case Y_CHANNEL_VAL: - return SRC_Y; - case Z_CHANNEL_VAL: - return SRC_Z; - case W_CHANNEL_VAL: - return SRC_W; - case ZERO_CHANNEL_VAL: - return SRC_ZERO; - case ONE_CHANNEL_VAL: - return SRC_ONE; - } - FatalError("Bad channel value %d\n", channel_val); -} +#define i915_fs_operand_one() i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define i915_get_hardware_channel_val(val, shift, negate) \ + (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0)) /** * Outputs a fragment shader command to declare a sampler or texture register. */ #define i915_fs_dcl(reg) \ -do { \ - FS_OUT(_i915_fs_dcl(reg)); \ -} while (0) - -/** - * Constructs a fragment shader command to declare a sampler or texture - * register. - */ -static inline struct i915_fs_op _i915_fs_dcl(int reg) -{ - struct i915_fs_op op; + do { \ + OUT_BATCH(D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + } while (0) - op.ui[0] = D0_DCL | (REG_TYPE(reg) << D0_TYPE_SHIFT) | - (REG_NR(reg) << D0_NR_SHIFT); - op.ui[1] = 0; - op.ui[2] = 0; - if (REG_TYPE(reg) != REG_TYPE_S) - op.ui[0] |= D0_CHANNEL_ALL; - - return op; -} - -/** - * Constructs a fragment shader command to load from a texture sampler. - */ #define i915_fs_texld(dest_reg, sampler_reg, address_reg) \ -do { \ - FS_OUT(_i915_fs_texld(T0_TEXLD, dest_reg, sampler_reg, address_reg)); \ -} while (0) + do { \ + OUT_BATCH(T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) #define i915_fs_texldp(dest_reg, sampler_reg, address_reg) \ -do { \ - FS_OUT(_i915_fs_texld(T0_TEXLDP, dest_reg, sampler_reg, address_reg)); \ -} while (0) - -static inline struct i915_fs_op -_i915_fs_texld(int load_op, int dest_reg, int sampler_reg, int address_reg) -{ - struct i915_fs_op op; - - op.ui[0] = 0; - op.ui[1] = 0; - op.ui[2] = 0; - - if (REG_TYPE(sampler_reg) != REG_TYPE_S) - FatalError("Bad sampler reg type\n"); - - op.ui[0] |= load_op; - op.ui[0] |= REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT; - op.ui[0] |= REG_NR(dest_reg) << T0_DEST_NR_SHIFT; - op.ui[0] |= REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT; - op.ui[1] |= REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT; - op.ui[1] |= REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT; - - return op; -} + do { \ + OUT_BATCH(T0_TEXLDP | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _i915_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) #define i915_fs_arith(op, dest_reg, operand0, operand1, operand2) \ - _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) - -static inline struct i915_fs_op -_i915_fs_arith(int cmd, int dest_reg, - struct i915_fs_operand operand0, - struct i915_fs_operand operand1, struct i915_fs_operand operand2) -{ - struct i915_fs_op op; - - op.ui[0] = 0; - op.ui[1] = 0; - op.ui[2] = 0; - - /* Set up destination register and write mask */ - op.ui[0] |= cmd; - op.ui[0] |= REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT; - op.ui[0] |= REG_NR(dest_reg) << A0_DEST_NR_SHIFT; - op.ui[0] |= A0_DEST_CHANNEL_ALL; - - /* Set up operand 0 */ - op.ui[0] |= REG_TYPE(operand0.reg) << A0_SRC0_TYPE_SHIFT; - op.ui[0] |= REG_NR(operand0.reg) << A0_SRC0_NR_SHIFT; - - op.ui[1] |= i915_get_hardware_channel_val(operand0.x) << - A1_SRC0_CHANNEL_X_SHIFT; - if (operand0.x < 0) - op.ui[1] |= A1_SRC0_CHANNEL_X_NEGATE; - - op.ui[1] |= i915_get_hardware_channel_val(operand0.y) << - A1_SRC0_CHANNEL_Y_SHIFT; - if (operand0.y < 0) - op.ui[1] |= A1_SRC0_CHANNEL_Y_NEGATE; - - op.ui[1] |= i915_get_hardware_channel_val(operand0.z) << - A1_SRC0_CHANNEL_Z_SHIFT; - if (operand0.z < 0) - op.ui[1] |= A1_SRC0_CHANNEL_Z_NEGATE; - - op.ui[1] |= i915_get_hardware_channel_val(operand0.w) << - A1_SRC0_CHANNEL_W_SHIFT; - if (operand0.w < 0) - op.ui[1] |= A1_SRC0_CHANNEL_W_NEGATE; - - /* Set up operand 1 */ - op.ui[1] |= REG_TYPE(operand1.reg) << A1_SRC1_TYPE_SHIFT; - op.ui[1] |= REG_NR(operand1.reg) << A1_SRC1_NR_SHIFT; - - op.ui[1] |= i915_get_hardware_channel_val(operand1.x) << - A1_SRC1_CHANNEL_X_SHIFT; - if (operand1.x < 0) - op.ui[1] |= A1_SRC1_CHANNEL_X_NEGATE; - - op.ui[1] |= i915_get_hardware_channel_val(operand1.y) << - A1_SRC1_CHANNEL_Y_SHIFT; - if (operand1.y < 0) - op.ui[1] |= A1_SRC1_CHANNEL_Y_NEGATE; - - op.ui[2] |= i915_get_hardware_channel_val(operand1.z) << - A2_SRC1_CHANNEL_Z_SHIFT; - if (operand1.z < 0) - op.ui[2] |= A2_SRC1_CHANNEL_Z_NEGATE; - - op.ui[2] |= i915_get_hardware_channel_val(operand1.w) << - A2_SRC1_CHANNEL_W_SHIFT; - if (operand1.w < 0) - op.ui[2] |= A2_SRC1_CHANNEL_W_NEGATE; - - /* Set up operand 2 */ - op.ui[2] |= REG_TYPE(operand2.reg) << A2_SRC2_TYPE_SHIFT; - op.ui[2] |= REG_NR(operand2.reg) << A2_SRC2_NR_SHIFT; - - op.ui[2] |= i915_get_hardware_channel_val(operand2.x) << - A2_SRC2_CHANNEL_X_SHIFT; - if (operand2.x < 0) - op.ui[2] |= A2_SRC2_CHANNEL_X_NEGATE; - - op.ui[2] |= i915_get_hardware_channel_val(operand2.y) << - A2_SRC2_CHANNEL_Y_SHIFT; - if (operand2.y < 0) - op.ui[2] |= A2_SRC2_CHANNEL_Y_NEGATE; - - op.ui[2] |= i915_get_hardware_channel_val(operand2.z) << - A2_SRC2_CHANNEL_Z_SHIFT; - if (operand2.z < 0) - op.ui[2] |= A2_SRC2_CHANNEL_Z_NEGATE; - - op.ui[2] |= i915_get_hardware_channel_val(operand2.w) << - A2_SRC2_CHANNEL_W_SHIFT; - if (operand2.w < 0) - op.ui[2] |= A2_SRC2_CHANNEL_W_NEGATE; - - return op; -} - -/** Move operand0 to dest_reg */ -#define i915_fs_mov(dest_reg, operand0) \ -do { \ - FS_OUT(i915_fs_arith(MOV, dest_reg, operand0, \ - i915_fs_operand_none(), \ - i915_fs_operand_none())); \ + _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _i915_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ + do { \ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ + } while (0) + +#define _i915_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ } while (0) -/** - * Move the value in operand0 to the dest reg with the masking/saturation - * specified. - */ +#define i915_fs_mov(dest_reg, operand0) \ + i915_fs_arith(MOV, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + #define i915_fs_mov_masked(dest_reg, dest_mask, operand0) \ -do { \ - struct i915_fs_op op; \ - \ - op = i915_fs_arith(MOV, dest_reg, operand0, i915_fs_operand_none(), \ - i915_fs_operand_none()); \ - op.ui[0] &= ~A0_DEST_CHANNEL_ALL; \ - op.ui[0] |= ((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT; \ - if ((dest_mask) & MASK_SATURATE) \ - op.ui[0] |= A0_DEST_SATURATE; \ - \ - FS_OUT(op); \ -} while (0) + i915_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + + +#define i915_fs_frc(dest_reg, operand0) \ + i915_fs_arith (FRC, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) /** Add operand0 and operand1 and put the result in dest_reg */ #define i915_fs_add(dest_reg, operand0, operand1) \ -do { \ - FS_OUT(i915_fs_arith(ADD, dest_reg, operand0, operand1, \ - i915_fs_operand_none())); \ -} while (0) + i915_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) -/** Add operand0 and operand1 and put the result in dest_reg */ +/** Multiply operand0 and operand1 and put the result in dest_reg */ #define i915_fs_mul(dest_reg, operand0, operand1) \ -do { \ - FS_OUT(i915_fs_arith(MUL, dest_reg, operand0, operand1, \ - i915_fs_operand_none())); \ -} while (0) + i915_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define i915_fs_rsq(dest_reg, dest_mask, operand0) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } else { \ + i915_fs_arith (RSQ, dest_reg, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } \ + } while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define i915_fs_min(dest_reg, operand0, operand1) \ + i915_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define i915_fs_max(dest_reg, operand0, operand1) \ + i915_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +#define i915_fs_cmp(dest_reg, operand0, operand1, operand2) \ + i915_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define i915_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +#define i915_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ + } while (0) /** * Perform a 3-component dot-product of operand0 and operand1 and put the * resulting scalar in the channels of dest_reg specified by the dest_mask. */ -#define i915_fs_dp3_masked(dest_reg, dest_mask, operand0, operand1) \ -do { \ - struct i915_fs_op op; \ - \ - op = i915_fs_arith(DP3, dest_reg, operand0, operand1, \ - i915_fs_operand_none()); \ - op.ui[0] &= ~A0_DEST_CHANNEL_ALL; \ - op.ui[0] |= ((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT; \ - if ((dest_mask) & MASK_SATURATE) \ - op.ui[0] |= A0_DEST_SATURATE; \ - \ - FS_OUT(op); \ -} while (0) +#define i915_fs_dp3(dest_reg, dest_mask, op0, op1) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + i915_fs_operand_none()); \ + } else { \ + i915_fs_arith (DP3, dest_reg, op0, op1,\ + i915_fs_operand_none()); \ + } \ + } while (0) /** * Sets up local state for accumulating a fragment shader buffer. @@ -418,36 +603,17 @@ do { \ * \param x maximum number of shader commands that may be used between * a FS_START and FS_END */ -#define FS_LOCALS(x) \ - uint32_t _shader_buf[(x) * 3]; \ - unsigned int _max_shader_commands = x; \ - unsigned int _cur_shader_commands +#define FS_LOCALS() \ + uint32_t _shader_offset #define FS_BEGIN() \ -do { \ - _cur_shader_commands = 0; \ -} while (0) - -#define FS_OUT(_shaderop) \ -do { \ - if (_cur_shader_commands >= _max_shader_commands) \ - FatalError("fragment shader command buffer exceeded (%d)\n", \ - _cur_shader_commands); \ - _shader_buf[_cur_shader_commands * 3 + 0] = _shaderop.ui[0]; \ - _shader_buf[_cur_shader_commands * 3 + 1] = _shaderop.ui[1]; \ - _shader_buf[_cur_shader_commands * 3 + 2] = _shaderop.ui[2]; \ - ++_cur_shader_commands; \ -} while (0) + do { \ + _shader_offset = intel->batch_used++; \ + } while (0) #define FS_END() \ -do { \ - int _i, _pad = (_cur_shader_commands & 0x1) ? 0 : 1; \ - ATOMIC_BATCH(_cur_shader_commands * 3 + 1 + _pad); \ - OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | \ - (_cur_shader_commands * 3 - 1)); \ - for (_i = 0; _i < _cur_shader_commands * 3; _i++) \ - OUT_BATCH(_shader_buf[_i]); \ - if (_pad != 0) \ - OUT_BATCH(MI_NOOP); \ - ADVANCE_BATCH(); \ -} while (0); + do { \ + intel->batch_ptr[_shader_offset] = \ + _3DSTATE_PIXEL_SHADER_PROGRAM | \ + (intel->batch_used - _shader_offset - 2); \ + } while (0); diff --git a/src/i915_reg.h b/src/i915_reg.h index a61bc401..746a4131 100644 --- a/src/i915_reg.h +++ b/src/i915_reg.h @@ -32,19 +32,20 @@ #define CMD_3D (0x3<<29) -#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_CLEAR_RECT (0xa<<18) -#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) #define PRIM3D_MASK (0x1f<<18) /* p137 */ diff --git a/src/i915_render.c b/src/i915_render.c index 98b5b88a..53c99142 100644 --- a/src/i915_render.c +++ b/src/i915_render.c @@ -35,8 +35,6 @@ #include "i915_reg.h" #include "i915_3d.h" -#define PIXEL_CENTRE_SAMPLE 0 - struct formatinfo { int fmt; uint32_t card_fmt; @@ -133,8 +131,10 @@ static uint32_t i915_get_blend_cntl(int op, PicturePtr mask, } } - return (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) | - (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); + return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) | + (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) | + (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); } #define DSTORG_HORT_BIAS(x) ((x)<<20) @@ -170,16 +170,72 @@ static Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format) (int)dest_picture->format); return FALSE; } -#if PIXEL_CENTRE_SAMPLE *dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8); -#endif return TRUE; } -static Bool i915_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, - int unit) +Bool +i915_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) +{ + ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; + uint32_t tmp1; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) { + intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n", + op); + return FALSE; + } + if (mask_picture != NULL && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + if (i915_blend_op[op].src_alpha && + (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) { + if (op != PictOpOver) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + } + + if (!i915_get_dest_format(dest_picture, &tmp1)) { + intel_debug_fallback(scrn, "Get Color buffer format\n"); + return FALSE; + } + + if (width > 2048 || height > 2048) + return FALSE; + + return TRUE; +} + +Bool +i915_check_composite_target(PixmapPtr pixmap) +{ + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048) + return FALSE; + + if(!intel_check_pitch_3d(pixmap)) + return FALSE; + + return TRUE; +} + +Bool +i915_check_composite_texture(ScreenPtr screen, PicturePtr picture) { if (picture->repeatType > RepeatReflect) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Unsupported picture repeat %d\n", picture->repeatType); return FALSE; @@ -187,17 +243,25 @@ static Bool i915_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, if (picture->filter != PictFilterNearest && picture->filter != PictFilterBilinear) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", picture->filter); return FALSE; } + if (picture->pSourcePict) { + SourcePict *source = picture->pSourcePict; + if (source->type == SourcePictTypeSolidFill) + return TRUE; + } + if (picture->pDrawable) { int w, h, i; w = picture->pDrawable->width; h = picture->pDrawable->height; if ((w > 2048) || (h > 2048)) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Picture w/h too large (%dx%d)\n", w, h); @@ -212,61 +276,17 @@ static Bool i915_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, } if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Unsupported picture format " "0x%x\n", (int)picture->format); return FALSE; } - } - - return TRUE; -} - -Bool -i915_check_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, - PicturePtr dest_picture) -{ - ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; - uint32_t tmp1; - - /* Check for unsupported compositing operations. */ - if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) { - intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n", - op); - return FALSE; - } - if (mask_picture != NULL && mask_picture->componentAlpha && - PICT_FORMAT_RGB(mask_picture->format)) { - /* Check if it's component alpha that relies on a source alpha - * and on the source value. We can only get one of those - * into the single source value that we get to blend with. - */ - if (i915_blend_op[op].src_alpha && - (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) { - intel_debug_fallback(scrn, - "Component alpha not supported " - "with source alpha and source " - "value blending.\n"); - return FALSE; - } - } - if (!i915_check_composite_texture(scrn, source_picture, 0)) { - intel_debug_fallback(scrn, "Check Src picture texture\n"); - return FALSE; - } - if (mask_picture != NULL - && !i915_check_composite_texture(scrn, mask_picture, 1)) { - intel_debug_fallback(scrn, "Check Mask picture texture\n"); - return FALSE; + return TRUE; } - if (!i915_get_dest_format(dest_picture, &tmp1)) { - intel_debug_fallback(scrn, "Get Color buffer format\n"); - return FALSE; - } - - return TRUE; + return FALSE; } static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) @@ -280,8 +300,8 @@ static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) pitch = intel_get_pixmap_pitch(pixmap); w = picture->pDrawable->width; h = picture->pDrawable->height; - intel->scale_units[unit][0] = pixmap->drawable.width; - intel->scale_units[unit][1] = pixmap->drawable.height; + intel->scale_units[unit][0] = 1. / pixmap->drawable.width; + intel->scale_units[unit][1] = 1. / pixmap->drawable.height; for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]); i++) { @@ -360,352 +380,161 @@ static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) return TRUE; } -Bool -i915_prepare_composite(int op, PicturePtr source_picture, - PicturePtr mask_picture, PicturePtr dest_picture, - PixmapPtr source, PixmapPtr mask, PixmapPtr dest) +static void +i915_emit_composite_primitive_constant(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) { - ScrnInfoPtr scrn = xf86Screens[source_picture->pDrawable->pScreen->myNum]; + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); - drm_intel_bo *bo_table[] = { - NULL, /* batch_bo */ - i830_get_pixmap_bo(source), - mask ? i830_get_pixmap_bo(mask) : NULL, - i830_get_pixmap_bo(dest), - }; - int tex_unit = 0; - - intel->render_source_picture = source_picture; - intel->render_source = source; - intel->render_mask_picture = mask_picture; - intel->render_mask = mask; - intel->render_dest_picture = dest_picture; - intel->render_dest = dest; - intel->render_source_is_solid = - source_picture->pDrawable && - source_picture->pDrawable->width == 1 && - source_picture->pDrawable->height == 1 && - source_picture->repeat; + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); - if (intel->render_source_is_solid) { - if (! uxa_get_color_for_pixmap (source, - source_picture->format, - PICT_a8r8g8b8, - &intel->render_source_solid)) - intel->render_source_is_solid = FALSE; - } - if (!intel->render_source_is_solid && !intel_check_pitch_3d(source)) - return FALSE; - - - intel->render_mask_is_solid = TRUE; /* mask == NULL => opaque */ - if (mask) { - intel->render_mask_is_solid = - mask_picture->pDrawable && - mask_picture->pDrawable->width == 1 && - mask_picture->pDrawable->height == 1 && - mask_picture->repeat; - if (intel->render_mask_is_solid) { - if (! uxa_get_color_for_pixmap (mask, - mask_picture->format, - PICT_a8r8g8b8, - &intel->render_mask_solid)) - intel->render_mask_is_solid = FALSE; - } - if (!intel->render_mask_is_solid && !intel_check_pitch_3d(mask)) - return FALSE; - } - - if (!intel_check_pitch_3d(dest)) - return FALSE; + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); - if (!i915_get_dest_format(dest_picture, - &intel->i915_render_state.dst_format)) - return FALSE; - - if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) - return FALSE; - intel->dst_coord_adjust = 0; - intel->src_coord_adjust = 0; - intel->mask_coord_adjust = 0; - - intel->transform[0] = NULL; - intel->scale_units[0][0] = -1; - intel->scale_units[0][1] = -1; - intel->transform[1] = NULL; - intel->scale_units[1][0] = -1; - intel->scale_units[1][1] = -1; - - if (! intel->render_source_is_solid) { - if (!i915_texture_setup(source_picture, source, tex_unit++)) { - intel_debug_fallback(scrn, "fail to setup src texture\n"); - return FALSE; - } - - if (source_picture->filter == PictFilterNearest) { -#if PIXEL_CENTRE_SAMPLE - intel->src_coord_adjust = 0.375; -#else - intel->dst_coord_adjust = -0.125; -#endif - } - } - - if (mask != NULL) { - if (! intel->render_mask_is_solid) { - if (!i915_texture_setup(mask_picture, mask, tex_unit++)) { - intel_debug_fallback(scrn, - "fail to setup mask texture\n"); - return FALSE; - } - - if (mask_picture->filter == PictFilterNearest) { -#if PIXEL_CENTRE_SAMPLE - intel->mask_coord_adjust = 0.375; -#else - intel->dst_coord_adjust = -0.125; -#endif - } - } - } - - intel->i915_render_state.op = op; - - if(i830_uxa_pixmap_is_dirty(source) || - (mask && i830_uxa_pixmap_is_dirty(mask))) - intel_batch_emit_flush(scrn); - - intel->needs_render_state_emit = TRUE; - - return TRUE; + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); } -static void i915_emit_composite_setup(ScrnInfoPtr scrn) +static void +i915_emit_composite_primitive_identity_source(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) { + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); - int op = intel->i915_render_state.op; - PicturePtr mask_picture = intel->render_mask_picture; - PicturePtr dest_picture = intel->render_dest_picture; - PixmapPtr mask = intel->render_mask; - PixmapPtr dest = intel->render_dest; - uint32_t dst_format = intel->i915_render_state.dst_format, dst_pitch; - uint32_t blendctl, tiling_bits; - Bool is_affine_src, is_affine_mask; - Bool is_solid_src, is_solid_mask; - int tex_count, t; - intel->needs_render_state_emit = FALSE; - - IntelEmitInvarientState(scrn); - intel->last_3d = LAST_3D_RENDER; + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); - dst_pitch = intel_get_pixmap_pitch(dest); - - is_affine_src = i830_transform_is_affine(intel->transform[0]); - is_affine_mask = i830_transform_is_affine(intel->transform[1]); - - is_solid_src = intel->render_source_is_solid; - is_solid_mask = intel->render_mask_is_solid; - - tex_count = 0; - tex_count += ! is_solid_src; - tex_count += mask && ! is_solid_mask; + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); - t = 15; - if (tex_count) - t += 6 * tex_count + 4; - if (is_solid_src) - t += 2; - if (mask && is_solid_mask) - t += 2; - ATOMIC_BATCH (t); - - if (tex_count != 0) { - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); - OUT_BATCH((1 << tex_count) - 1); - for (t = 0; t < tex_count; t++) { - OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0); - OUT_BATCH(intel->mapstate[3*t + 1]); - OUT_BATCH(intel->mapstate[3*t + 2]); - } - - OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); - OUT_BATCH((1 << tex_count) - 1); - for (t = 0; t < tex_count; t++) { - OUT_BATCH(intel->samplerstate[3*t + 0]); - OUT_BATCH(intel->samplerstate[3*t + 1]); - OUT_BATCH(intel->samplerstate[3*t + 2]); - } - } - - if (is_solid_src) { - OUT_BATCH (_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH (intel->render_source_solid); - } - if (mask && is_solid_mask) { - OUT_BATCH (_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH (intel->render_mask_solid); - } - - if (i830_pixmap_tiled(dest)) { - tiling_bits = BUF_3D_TILED_SURFACE; - if (i830_get_pixmap_intel(dest)->tiling - == I915_TILING_Y) - tiling_bits |= BUF_3D_TILE_WALK_Y; - } else - tiling_bits = 0; - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | - BUF_3D_PITCH(dst_pitch)); - OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, 0); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(dst_format); - - { - uint32_t ss2; - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | - I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3); - ss2 = ~0; - t = 0; - if (! is_solid_src) { - ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); - ss2 |= S2_TEXCOORD_FMT(t, - is_affine_src ? TEXCOORDFMT_2D : - TEXCOORDFMT_4D); - t++; - } - if (mask && ! is_solid_mask) { - ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); - ss2 |= S2_TEXCOORD_FMT(t, - is_affine_mask ? TEXCOORDFMT_2D : - TEXCOORDFMT_4D); - t++; - } - OUT_BATCH(ss2); - OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE | - S4_CULLMODE_NONE | S4_VFMT_XY); - blendctl = - i915_get_blend_cntl(op, mask_picture, dest_picture->format); - OUT_BATCH(0x00000000); /* Disable stencil buffer */ - OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | - (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) | - blendctl); - - /* draw rect is unconditional */ - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0x00000000); - OUT_BATCH(0x00000000); /* ymin, xmin */ - OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | - DRAW_XMAX(dest->drawable.width - 1)); - /* yorig, xorig (relate to color buffer?) */ - OUT_BATCH(0x00000000); - } - - ADVANCE_BATCH(); - - { - FS_LOCALS(20); - int src_reg, mask_reg, out_reg = FS_OC; - - FS_BEGIN(); + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); +} - if (dst_format == COLR_BUF_8BIT) - out_reg = FS_U0; +static void +i915_emit_composite_primitive_affine_source(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + float src_x[3], src_y[3]; + + if (!i830_get_transformed_coordinates(srcX, srcY, + intel->transform[0], + &src_x[0], + &src_y[0])) + return; + + if (!i830_get_transformed_coordinates(srcX, srcY + h, + intel->transform[0], + &src_x[1], + &src_y[1])) + return; + + if (!i830_get_transformed_coordinates(srcX + w, srcY + h, + intel->transform[0], + &src_x[2], + &src_y[2])) + return; + + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); +} - /* Declare the registers necessary for our program. */ - t = 0; - if (is_solid_src) { - i915_fs_dcl(FS_T8); - src_reg = FS_T8; - } else { - i915_fs_dcl(FS_T0); - i915_fs_dcl(FS_S0); - t++; - } - if (mask) { - if (is_solid_mask) { - i915_fs_dcl(FS_T9); - mask_reg = FS_T9; - } else { - i915_fs_dcl(FS_T0 + t); - i915_fs_dcl(FS_S0 + t); - } - } +static void +i915_emit_composite_primitive_constant_identity_mask(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); - /* Load the source_picture texel */ - if (! is_solid_src) { - if (is_affine_src) { - i915_fs_texld(FS_R0, FS_S0, FS_T0); - } else { - i915_fs_texldp(FS_R0, FS_S0, FS_T0); - } + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((maskX + w) * intel->scale_units[0][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[0][1]); - src_reg = FS_R0; - } + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(maskX * intel->scale_units[0][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[0][1]); - if (!mask) { - /* No mask, so move to output color */ - i915_fs_mov(out_reg, i915_fs_operand_reg(src_reg)); - } else { - if (! is_solid_mask) { - /* Load the mask_picture texel */ - if (is_affine_mask) { - i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t); - } else { - i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t); - } - - mask_reg = FS_R1; - } + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(maskX * intel->scale_units[0][0]); + OUT_VERTEX(maskY * intel->scale_units[0][1]); +} - /* If component alpha is active in the mask and the blend - * operation uses the source alpha, then we know we don't - * need the source value (otherwise we would have hit a - * fallback earlier), so we provide the source alpha (src.A * - * mask.X) as output color. - * Conversely, if CA is set and we don't need the source alpha, - * then we produce the source value (src.X * mask.X) and the - * source alpha is unused. Otherwise, we provide the non-CA - * source value (src.X * mask.A). - */ - if (mask_picture->componentAlpha && - PICT_FORMAT_RGB(mask_picture->format)) { - if (i915_blend_op[op].src_alpha) { - i915_fs_mul(out_reg, - i915_fs_operand(src_reg, W, W, W, W), - i915_fs_operand_reg(mask_reg)); - } else { - i915_fs_mul(out_reg, - i915_fs_operand_reg(src_reg), - i915_fs_operand_reg(mask_reg)); - } - } else { - i915_fs_mul(out_reg, - i915_fs_operand_reg(src_reg), - i915_fs_operand(mask_reg, W, W, W, W)); - } - } - if (dst_format == COLR_BUF_8BIT) - i915_fs_mov(FS_OC, i915_fs_operand(out_reg, W, W, W, W)); +static void +i915_emit_composite_primitive_identity_source_mask(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); - FS_END(); - } + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX(maskY * intel->scale_units[1][1]); } -/* Emit the vertices for a single composite rectangle. - * - * This function is no longer shared between i830 and i915 generation code. - */ static void i915_emit_composite_primitive(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, - int dstX, int dstY, int w, int h) + int dstX, int dstY, + int w, int h) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); @@ -718,28 +547,25 @@ i915_emit_composite_primitive(PixmapPtr dest, per_vertex = 2; /* dest x/y */ if (! intel->render_source_is_solid) { - float x = srcX + intel->src_coord_adjust; - float y = srcY + intel->src_coord_adjust; - src_unit = tex_unit++; is_affine_src = i830_transform_is_affine(intel->transform[src_unit]); if (is_affine_src) { - if (!i830_get_transformed_coordinates(x, y, + if (!i830_get_transformed_coordinates(srcX, srcY, intel-> transform[src_unit], &src_x[0], &src_y[0])) return; - if (!i830_get_transformed_coordinates(x, y + h, + if (!i830_get_transformed_coordinates(srcX, srcY + h, intel-> transform[src_unit], &src_x[1], &src_y[1])) return; - if (!i830_get_transformed_coordinates(x + w, y + h, + if (!i830_get_transformed_coordinates(srcX + w, srcY + h, intel-> transform[src_unit], &src_x[2], @@ -748,7 +574,7 @@ i915_emit_composite_primitive(PixmapPtr dest, per_vertex += 2; /* src x/y */ } else { - if (!i830_get_transformed_coordinates_3d(x, y, + if (!i830_get_transformed_coordinates_3d(srcX, srcY, intel-> transform[src_unit], &src_x[0], @@ -756,7 +582,7 @@ i915_emit_composite_primitive(PixmapPtr dest, &src_w[0])) return; - if (!i830_get_transformed_coordinates_3d(x, y + h, + if (!i830_get_transformed_coordinates_3d(srcX, srcY + h, intel-> transform[src_unit], &src_x[1], @@ -764,7 +590,7 @@ i915_emit_composite_primitive(PixmapPtr dest, &src_w[1])) return; - if (!i830_get_transformed_coordinates_3d(x + w, y + h, + if (!i830_get_transformed_coordinates_3d(srcX + w, srcY + h, intel-> transform[src_unit], &src_x[2], @@ -777,28 +603,25 @@ i915_emit_composite_primitive(PixmapPtr dest, } if (intel->render_mask && ! intel->render_mask_is_solid) { - float x = maskX + intel->mask_coord_adjust; - float y = maskY + intel->mask_coord_adjust; - mask_unit = tex_unit++; is_affine_mask = i830_transform_is_affine(intel->transform[mask_unit]); if (is_affine_mask) { - if (!i830_get_transformed_coordinates(x, y, + if (!i830_get_transformed_coordinates(maskX, maskY, intel-> transform[mask_unit], &mask_x[0], &mask_y[0])) return; - if (!i830_get_transformed_coordinates(x, y + h, + if (!i830_get_transformed_coordinates(maskX, maskY + h, intel-> transform[mask_unit], &mask_x[1], &mask_y[1])) return; - if (!i830_get_transformed_coordinates(x + w, y + h, + if (!i830_get_transformed_coordinates(maskX + w, maskY + h, intel-> transform[mask_unit], &mask_x[2], @@ -807,7 +630,7 @@ i915_emit_composite_primitive(PixmapPtr dest, per_vertex += 2; /* mask x/y */ } else { - if (!i830_get_transformed_coordinates_3d(x, y, + if (!i830_get_transformed_coordinates_3d(maskX, maskY, intel-> transform[mask_unit], &mask_x[0], @@ -815,7 +638,7 @@ i915_emit_composite_primitive(PixmapPtr dest, &mask_w[0])) return; - if (!i830_get_transformed_coordinates_3d(x, y + h, + if (!i830_get_transformed_coordinates_3d(maskX, maskY + h, intel-> transform[mask_unit], &mask_x[1], @@ -823,7 +646,7 @@ i915_emit_composite_primitive(PixmapPtr dest, &mask_w[1])) return; - if (!i830_get_transformed_coordinates_3d(x + w, y + h, + if (!i830_get_transformed_coordinates_3d(maskX + w, maskY + h, intel-> transform[mask_unit], &mask_x[2], @@ -837,67 +660,444 @@ i915_emit_composite_primitive(PixmapPtr dest, num_floats = 3 * per_vertex; - ATOMIC_BATCH(1 + num_floats); - - OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (num_floats - 1)); - OUT_BATCH_F(intel->dst_coord_adjust + dstX + w); - OUT_BATCH_F(intel->dst_coord_adjust + dstY + h); + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); if (! intel->render_source_is_solid) { - OUT_BATCH_F(src_x[2] / intel->scale_units[src_unit][0]); - OUT_BATCH_F(src_y[2] / intel->scale_units[src_unit][1]); + OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]); if (!is_affine_src) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(src_w[2]); + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[2]); } } if (intel->render_mask && ! intel->render_mask_is_solid) { - OUT_BATCH_F(mask_x[2] / intel->scale_units[mask_unit][0]); - OUT_BATCH_F(mask_y[2] / intel->scale_units[mask_unit][1]); + OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]); if (!is_affine_mask) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(mask_w[2]); + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[2]); } } - OUT_BATCH_F(intel->dst_coord_adjust + dstX); - OUT_BATCH_F(intel->dst_coord_adjust + dstY + h); + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); if (! intel->render_source_is_solid) { - OUT_BATCH_F(src_x[1] / intel->scale_units[src_unit][0]); - OUT_BATCH_F(src_y[1] / intel->scale_units[src_unit][1]); + OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]); if (!is_affine_src) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(src_w[1]); + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[1]); } } if (intel->render_mask && ! intel->render_mask_is_solid) { - OUT_BATCH_F(mask_x[1] / intel->scale_units[mask_unit][0]); - OUT_BATCH_F(mask_y[1] / intel->scale_units[mask_unit][1]); + OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]); if (!is_affine_mask) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(mask_w[1]); + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[1]); } } - OUT_BATCH_F(intel->dst_coord_adjust + dstX); - OUT_BATCH_F(intel->dst_coord_adjust + dstY); + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); if (! intel->render_source_is_solid) { - OUT_BATCH_F(src_x[0] / intel->scale_units[src_unit][0]); - OUT_BATCH_F(src_y[0] / intel->scale_units[src_unit][1]); + OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]); if (!is_affine_src) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(src_w[0]); + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[0]); } } if (intel->render_mask && ! intel->render_mask_is_solid) { - OUT_BATCH_F(mask_x[0] / intel->scale_units[mask_unit][0]); - OUT_BATCH_F(mask_y[0] / intel->scale_units[mask_unit][1]); + OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]); if (!is_affine_mask) { - OUT_BATCH_F(0.0); - OUT_BATCH_F(mask_w[0]); + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[0]); + } + } +} + +Bool +i915_prepare_composite(int op, PicturePtr source_picture, + PicturePtr mask_picture, PicturePtr dest_picture, + PixmapPtr source, PixmapPtr mask, PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + i830_get_pixmap_bo(dest), + source ? i830_get_pixmap_bo(source) : NULL, + mask ? i830_get_pixmap_bo(mask) : NULL, + }; + int tex_unit = 0; + int floats_per_vertex; + + intel->render_source_picture = source_picture; + intel->render_source = source; + intel->render_mask_picture = mask_picture; + intel->render_mask = mask; + intel->render_dest_picture = dest_picture; + intel->render_dest = dest; + + intel->render_source_is_solid = FALSE; + if (source_picture->pSourcePict) { + SourcePict *source = source_picture->pSourcePict; + if (source->type == SourcePictTypeSolidFill) { + intel->render_source_is_solid = TRUE; + intel->render_source_solid = source->solidFill.color; + } + } + if (!intel->render_source_is_solid && !intel_check_pitch_3d(source)) + return FALSE; + + intel->render_mask_is_solid = FALSE; + if (mask) { + if (mask_picture->pSourcePict) { + SourcePict *source = mask_picture->pSourcePict; + if (source->type == SourcePictTypeSolidFill) { + intel->render_mask_is_solid = TRUE; + intel->render_mask_solid = source->solidFill.color; + } + } + if (!intel->render_mask_is_solid && !intel_check_pitch_3d(mask)) + return FALSE; + } + + if (!intel_check_pitch_3d(dest)) + return FALSE; + + if (!i915_get_dest_format(dest_picture, + &intel->i915_render_state.dst_format)) + return FALSE; + + if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + + intel->needs_render_ca_pass = FALSE; + if (mask_picture != NULL && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + if (i915_blend_op[op].src_alpha && + (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) { + if (op != PictOpOver) + return FALSE; + + intel->needs_render_ca_pass = TRUE; + } + } + + intel->transform[0] = NULL; + intel->scale_units[0][0] = -1; + intel->scale_units[0][1] = -1; + intel->transform[1] = NULL; + intel->scale_units[1][0] = -1; + intel->scale_units[1][1] = -1; + + floats_per_vertex = 2; /* dest x/y */ + if (! intel->render_source_is_solid) { + if (!i915_texture_setup(source_picture, source, tex_unit++)) { + intel_debug_fallback(scrn, "fail to setup src texture\n"); + return FALSE; + } + + if (i830_transform_is_affine(source_picture->transform)) + floats_per_vertex += 2; /* src x/y */ + else + floats_per_vertex += 4; /* src x/y/z/w */ + } + + if (mask != NULL) { + if (! intel->render_mask_is_solid) { + if (!i915_texture_setup(mask_picture, mask, tex_unit++)) { + intel_debug_fallback(scrn, + "fail to setup mask texture\n"); + return FALSE; + } + + if (i830_transform_is_affine(mask_picture->transform)) + floats_per_vertex += 2; /* mask x/y */ + else + floats_per_vertex += 4; /* mask x/y/z/w */ } } - ADVANCE_BATCH(); + intel->i915_render_state.op = op; + + /* BUF_INFO is an implicit flush */ + if (dest != intel->render_current_dest) + intel_batch_do_flush(scrn); + else if((source && i830_uxa_pixmap_is_dirty(source)) || + (mask && i830_uxa_pixmap_is_dirty(mask))) + intel_batch_emit_flush(scrn); + + intel->needs_render_state_emit = TRUE; + + intel->prim_emit = i915_emit_composite_primitive; + if (!mask) { + if (intel->render_source_is_solid) + intel->prim_emit = i915_emit_composite_primitive_constant; + else if (intel->transform[0] == NULL) + intel->prim_emit = i915_emit_composite_primitive_identity_source; + else if (i830_transform_is_affine(intel->transform[0])) + intel->prim_emit = i915_emit_composite_primitive_affine_source; + } else { + if (intel->transform[0] == NULL) { + if (intel->render_source_is_solid) + intel->prim_emit = i915_emit_composite_primitive_constant_identity_mask; + else if (intel->transform[1] == NULL) + intel->prim_emit = i915_emit_composite_primitive_identity_source_mask; + } + } + + if (floats_per_vertex != intel->floats_per_vertex) { + intel->floats_per_vertex = floats_per_vertex; + intel->needs_render_vertex_emit = TRUE; + } + + return TRUE; +} + +static void +i915_composite_emit_shader(intel_screen_private *intel, CARD8 op) +{ + PicturePtr mask_picture = intel->render_mask_picture; + PixmapPtr mask = intel->render_mask; + int src_reg, mask_reg; + Bool is_solid_src, is_solid_mask; + Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0; + int tex_unit, t; + FS_LOCALS(); + + is_solid_src = intel->render_source_is_solid; + is_solid_mask = intel->render_mask_is_solid; + + FS_BEGIN(); + + /* Declare the registers necessary for our program. */ + t = 0; + if (is_solid_src) { + i915_fs_dcl(FS_T8); + src_reg = FS_T8; + } else { + i915_fs_dcl(FS_T0); + i915_fs_dcl(FS_S0); + t++; + } + if (!mask) { + /* No mask, so load directly to output color */ + if (! is_solid_src) { + if (dest_is_alpha) + src_reg = FS_R0; + else + src_reg = FS_OC; + + if (i830_transform_is_affine(intel->transform[0])) + i915_fs_texld(src_reg, FS_S0, FS_T0); + else + i915_fs_texldp(src_reg, FS_S0, FS_T0); + } + + if (src_reg != FS_OC) { + if (dest_is_alpha) + i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W)); + else + i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg)); + } + } else { + if (is_solid_mask) { + i915_fs_dcl(FS_T9); + mask_reg = FS_T9; + } else { + i915_fs_dcl(FS_T0 + t); + i915_fs_dcl(FS_S0 + t); + } + + tex_unit = 0; + if (! is_solid_src) { + /* Load the source_picture texel */ + if (i830_transform_is_affine(intel->transform[tex_unit])) + i915_fs_texld(FS_R0, FS_S0, FS_T0); + else + i915_fs_texldp(FS_R0, FS_S0, FS_T0); + + src_reg = FS_R0; + tex_unit++; + } + + if (! is_solid_mask) { + /* Load the mask_picture texel */ + if (i830_transform_is_affine(intel->transform[tex_unit])) + i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t); + else + i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t); + + mask_reg = FS_R1; + } + + if (dest_is_alpha) { + i915_fs_mul(FS_OC, + i915_fs_operand(src_reg, W, W, W, W), + i915_fs_operand(mask_reg, W, W, W, W)); + } else { + /* If component alpha is active in the mask and the blend + * operation uses the source alpha, then we know we don't + * need the source value (otherwise we would have hit a + * fallback earlier), so we provide the source alpha (src.A * + * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, + * then we produce the source value (src.X * mask.X) and the + * source alpha is unused. Otherwise, we provide the non-CA + * source value (src.X * mask.A). + */ + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + if (i915_blend_op[op].src_alpha) { + i915_fs_mul(FS_OC, + i915_fs_operand(src_reg, W, W, W, W), + i915_fs_operand_reg(mask_reg)); + } else { + i915_fs_mul(FS_OC, + i915_fs_operand_reg(src_reg), + i915_fs_operand_reg(mask_reg)); + } + } else { + i915_fs_mul(FS_OC, + i915_fs_operand_reg(src_reg), + i915_fs_operand(mask_reg, W, W, W, W)); + } + } + } + + FS_END(); +} + +static void i915_emit_composite_setup(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int op = intel->i915_render_state.op; + PicturePtr mask_picture = intel->render_mask_picture; + PicturePtr dest_picture = intel->render_dest_picture; + PixmapPtr mask = intel->render_mask; + PixmapPtr dest = intel->render_dest; + Bool is_solid_src, is_solid_mask; + int tex_count, t; + + intel->needs_render_state_emit = FALSE; + + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_RENDER; + + is_solid_src = intel->render_source_is_solid; + is_solid_mask = intel->render_mask_is_solid; + + tex_count = 0; + tex_count += ! is_solid_src; + tex_count += mask && ! is_solid_mask; + + assert(intel->in_batch_atomic); + + if (tex_count != 0) { + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (t = 0; t < tex_count; t++) { + OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(intel->mapstate[3*t + 1]); + OUT_BATCH(intel->mapstate[3*t + 2]); + } + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (t = 0; t < tex_count; t++) { + OUT_BATCH(intel->samplerstate[3*t + 0]); + OUT_BATCH(intel->samplerstate[3*t + 1]); + OUT_BATCH(intel->samplerstate[3*t + 2]); + } + } + + if (is_solid_src) { + OUT_BATCH (_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH (intel->render_source_solid); + } + if (mask && is_solid_mask) { + OUT_BATCH (_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH (intel->render_mask_solid); + } + + /* BUF_INFO is an implicit flush, so avoid if the target has not changed. + * XXX However for reasons unfathomed, correct rendering in KDE requires + * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here. + */ + if (1 || dest != intel->render_current_dest) { + uint32_t tiling_bits; + + intel_batch_do_flush(scrn); + + if (i830_pixmap_tiled(dest)) { + tiling_bits = BUF_3D_TILED_SURFACE; + if (i830_get_pixmap_intel(dest)->tiling + == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + } else + tiling_bits = 0; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(intel_get_pixmap_pitch(dest))); + OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(intel->i915_render_state.dst_format); + + /* draw rect is unconditional */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0x00000000); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | + DRAW_XMAX(dest->drawable.width - 1)); + /* yorig, xorig (relate to color buffer?) */ + OUT_BATCH(0x00000000); + + intel->render_current_dest = dest; + } + + { + uint32_t ss2; + + ss2 = ~0; + t = 0; + if (! is_solid_src) { + ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(t, + i830_transform_is_affine(intel->transform[t]) ? + TEXCOORDFMT_2D : TEXCOORDFMT_4D); + t++; + } + if (mask && ! is_solid_mask) { + ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(t, + i830_transform_is_affine(intel->transform[t]) ? + TEXCOORDFMT_2D : TEXCOORDFMT_4D); + t++; + } + + if (intel->needs_render_ca_pass) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0); + OUT_BATCH(ss2); + } else { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format)); + } + } + + if (! intel->needs_render_ca_pass) + i915_composite_emit_shader(intel, op); } void @@ -913,15 +1113,87 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, if (intel->needs_render_state_emit) i915_emit_composite_setup(scrn); - i915_emit_composite_primitive(dest, srcX, srcY, maskX, maskY, dstX, - dstY, w, h); + if (intel->needs_render_vertex_emit || + intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { + i915_vertex_flush(intel); + + if (intel_vertex_space(intel) < 256) { + intel_next_vertex(intel); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + intel->vertex_index = 0; + } else if (intel->floats_per_vertex != intel->last_floats_per_vertex){ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(1) | 0); + OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + + intel->vertex_index = + (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; + intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; + } + + intel->last_floats_per_vertex = intel->floats_per_vertex; + intel->needs_render_vertex_emit = FALSE; + } + + if (intel->prim_offset == 0) { + if (intel->needs_render_ca_pass) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); + OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse, + intel->render_mask_picture, + intel->render_dest_picture->format)); + i915_composite_emit_shader(intel, PictOpOutReverse); + } + + intel->prim_offset = intel->batch_used; + OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL); + OUT_BATCH(intel->vertex_index); + } + intel->vertex_count += 3; + + intel->prim_emit(dest, + srcX, srcY, + maskX, maskY, + dstX, dstY, + w, h); intel_batch_end_atomic(scrn); } -void i915_batch_flush_notify(ScrnInfoPtr scrn) +void +i915_vertex_flush(intel_screen_private *intel) +{ + if (intel->prim_offset == 0) + return; + + intel->batch_ptr[intel->prim_offset] |= intel->vertex_count; + intel->prim_offset = 0; + + if (intel->needs_render_ca_pass) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); + OUT_BATCH(i915_get_blend_cntl(PictOpAdd, + intel->render_mask_picture, + intel->render_dest_picture->format)); + i915_composite_emit_shader(intel, PictOpAdd); + OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | intel->vertex_count); + OUT_BATCH(intel->vertex_index); + } + + intel->vertex_index += intel->vertex_count; + intel->vertex_count = 0; +} + +void +i915_batch_flush_notify(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); intel->needs_render_state_emit = TRUE; + intel->render_current_dest = NULL; + intel->last_floats_per_vertex = 0; } diff --git a/src/i915_video.c b/src/i915_video.c index 927047be..893855bd 100644 --- a/src/i915_video.c +++ b/src/i915_video.c @@ -49,17 +49,47 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, PixmapPtr pixmap) { intel_screen_private *intel = intel_get_screen_private(scrn); - uint32_t format, ms3, s5; + uint32_t format, ms3, s5, tiling; BoxPtr pbox = REGION_RECTS(dstRegion); int nbox_total = REGION_NUM_RECTS(dstRegion); int nbox_this_time; int dxo, dyo, pix_xoff, pix_yoff; + PixmapPtr target; #if 0 ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height, video_pitch); #endif + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 || + !intel_check_pitch_3d(pixmap)) { + ScreenPtr screen = pixmap->drawable.pScreen; + + target = screen->CreatePixmap(screen, + drw_w, drw_h, + pixmap->drawable.depth, + CREATE_PIXMAP_USAGE_SCRATCH); + + pix_xoff = -dxo; + pix_yoff = -dyo; + } else { + target = pixmap; + + /* Set up the offset for translating from the given region + * (in screen coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -target->screen_x + target->drawable.x; + pix_yoff = -target->screen_y + target->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + } + #define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4) #define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20) #define BATCH_BYTES(p) ((p)->batch_bo->size - 16) @@ -75,26 +105,18 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_VIDEO; - ATOMIC_BATCH(20); - - /* flush map & render cache */ - OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | - MI_INVALIDATE_MAP_CACHE); - OUT_BATCH(0x00000000); - /* draw rect -- just clipping */ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) | DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3)); OUT_BATCH(0x00000000); /* ymin, xmin */ /* ymax, xmax */ - OUT_BATCH((pixmap->drawable.width - 1) | - (pixmap->drawable.height - 1) << 16); + OUT_BATCH((target->drawable.width - 1) | + (target->drawable.height - 1) << 16); OUT_BATCH(0x00000000); /* yorigin, xorigin */ - OUT_BATCH(MI_NOOP); OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | - I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3); + I1_LOAD_S(5) | I1_LOAD_S(6) | 2); OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | @@ -103,8 +125,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); - OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE | - S4_CULLMODE_NONE | S4_VFMT_XY); s5 = 0x0; if (intel->cpp == 2) s5 |= S5_COLOR_DITHER_ENABLE; @@ -129,17 +149,21 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, DSTORG_VERT_BIAS(0x8) | format); /* front buffer, pitch, offset */ + if (i830_pixmap_tiled(target)) { + tiling = BUF_3D_TILED_SURFACE; + if (i830_get_pixmap_intel(target)->tiling == I915_TILING_Y) + tiling |= BUF_3D_TILE_WALK_Y; + } else + tiling = 0; OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_USE_FENCE | - BUF_3D_PITCH(intel_get_pixmap_pitch(pixmap))); - OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_RENDER, + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling | + BUF_3D_PITCH(intel_get_pixmap_pitch(target))); + OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - ADVANCE_BATCH(); if (!is_planar_fourcc(id)) { - FS_LOCALS(10); + FS_LOCALS(); - ATOMIC_BATCH(16); OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); OUT_BATCH(0x0000001); /* constant 0 */ /* constant 0: brightness/contrast */ @@ -170,7 +194,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, else OUT_BATCH(adaptor_priv->YBufOffset); - ms3 = MAPSURF_422 | MS3_USE_FENCE_REGS; + ms3 = MAPSURF_422; switch (id) { case FOURCC_YUY2: ms3 |= MT_422_YCRCB_NORMAL; @@ -184,8 +208,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, OUT_BATCH(ms3); OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); - ADVANCE_BATCH(); - FS_BEGIN(); i915_fs_dcl(FS_S0); i915_fs_dcl(FS_T0); @@ -198,9 +220,8 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, } FS_END(); } else { - FS_LOCALS(16); + FS_LOCALS(); - ATOMIC_BATCH(22 + 11 + 11); /* For the planar formats, we set up three samplers -- * one for each plane, in a Y8 format. Because I * couldn't get the special PLANAR_TO_PACKED @@ -292,7 +313,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, else OUT_BATCH(adaptor_priv->YBufOffset); - ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; + ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); @@ -314,7 +335,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, else OUT_BATCH(adaptor_priv->UBufOffset); - ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; + ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); @@ -327,12 +348,11 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, else OUT_BATCH(adaptor_priv->VBufOffset); - ms3 = MAPSURF_8BIT | MT_8BIT_I8 | MS3_USE_FENCE_REGS; + ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); - ADVANCE_BATCH(); FS_BEGIN(); /* Declare samplers */ @@ -365,15 +385,15 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, * color. The OC results are implicitly clamped * at the end of the program. */ - i915_fs_dp3_masked(FS_OC, MASK_X, - i915_fs_operand_reg(FS_R0), - i915_fs_operand_reg(FS_C1)); - i915_fs_dp3_masked(FS_OC, MASK_Y, - i915_fs_operand_reg(FS_R0), - i915_fs_operand_reg(FS_C2)); - i915_fs_dp3_masked(FS_OC, MASK_Z, - i915_fs_operand_reg(FS_R0), - i915_fs_operand_reg(FS_C3)); + i915_fs_dp3(FS_OC, MASK_X, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C1)); + i915_fs_dp3(FS_OC, MASK_Y, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C2)); + i915_fs_dp3(FS_OC, MASK_Z, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C3)); /* Set alpha of the output to 1.0, by wiring W to 1 * and not actually using the source. */ @@ -389,28 +409,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, FS_END(); } - { - ATOMIC_BATCH(2); - OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | - MI_INVALIDATE_MAP_CACHE); - OUT_BATCH(0x00000000); - ADVANCE_BATCH(); - } - - /* Set up the offset for translating from the given region - * (in screen coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - - dxo = dstRegion->extents.x1; - dyo = dstRegion->extents.y1; - + OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); while (nbox_this_time--) { int box_x1 = pbox->x1; int box_y1 = pbox->y1; @@ -423,19 +422,9 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, src_scale_x = ((float)src_w / width) / drw_w; src_scale_y = ((float)src_h / height) / drw_h; - ATOMIC_BATCH(8 + 12); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - OUT_BATCH(MI_NOOP); - /* vertex data - rect list consists of bottom right, * bottom left, and top left vertices. */ - OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 - 1)); /* bottom right */ OUT_BATCH_F(box_x2 + pix_xoff); @@ -454,12 +443,39 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn, OUT_BATCH_F(box_y1 + pix_yoff); OUT_BATCH_F((box_x1 - dxo) * src_scale_x); OUT_BATCH_F((box_y1 - dyo) * src_scale_y); - - ADVANCE_BATCH(); } intel_batch_end_atomic(scrn); } + if (target != pixmap) { + GCPtr gc; + + gc = GetScratchGC(pixmap->drawable.depth, + pixmap->drawable.pScreen); + if (gc) { + RegionPtr tmp; + + ValidateGC(&pixmap->drawable, gc); + + if (REGION_NUM_RECTS(dstRegion) > 1) { + tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0); + if (tmp) { + REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion); + gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0); + } + } + + gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc, + 0, 0, + target->drawable.width, + target->drawable.height, + -pix_xoff, -pix_yoff); + FreeScratchGC(gc); + } + + target->drawable.pScreen->DestroyPixmap(target); + } + i830_debug_flush(scrn); } diff --git a/src/i965_render.c b/src/i965_render.c index 7866dd75..75d63807 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -183,10 +183,52 @@ static Bool i965_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format) return TRUE; } -static Bool i965_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, - int unit) +Bool +i965_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) +{ + ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; + uint32_t tmp1; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { + intel_debug_fallback(scrn, + "Unsupported Composite op 0x%x\n", op); + return FALSE; + } + + if (mask_picture && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i965_blend_op[op].src_alpha && + (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + + if (!i965_get_dest_format(dest_picture, &tmp1)) { + intel_debug_fallback(scrn, "Get Color buffer format\n"); + return FALSE; + } + + return TRUE; +} + +Bool +i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) { if (picture->repeatType > RepeatReflect) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "extended repeat (%d) not supported\n", picture->repeatType); @@ -195,6 +237,7 @@ static Bool i965_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, if (picture->filter != PictFilterNearest && picture->filter != PictFilterBilinear) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", picture->filter); return FALSE; @@ -206,6 +249,7 @@ static Bool i965_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, w = picture->pDrawable->width; h = picture->pDrawable->height; if ((w > 8192) || (h > 8192)) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Picture w/h too large (%dx%d)\n", w, h); @@ -220,66 +264,21 @@ static Bool i965_check_composite_texture(ScrnInfoPtr scrn, PicturePtr picture, } if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) { + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; intel_debug_fallback(scrn, "Unsupported picture format " "0x%x\n", (int)picture->format); return FALSE; } - } - - return TRUE; -} - -Bool -i965_check_composite(int op, PicturePtr source_picture, PicturePtr mask_picture, - PicturePtr dest_picture) -{ - ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum]; - uint32_t tmp1; - - /* Check for unsupported compositing operations. */ - if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { - intel_debug_fallback(scrn, - "Unsupported Composite op 0x%x\n", op); - return FALSE; - } - - if (mask_picture && mask_picture->componentAlpha && - PICT_FORMAT_RGB(mask_picture->format)) { - /* Check if it's component alpha that relies on a source alpha and on - * the source value. We can only get one of those into the single - * source value that we get to blend with. - */ - if (i965_blend_op[op].src_alpha && - (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { - intel_debug_fallback(scrn, - "Component alpha not supported " - "with source alpha and source " - "value blending.\n"); - return FALSE; - } - } - if (!i965_check_composite_texture(scrn, source_picture, 0)) { - intel_debug_fallback(scrn, "Check Src picture texture\n"); - return FALSE; - } - if (mask_picture != NULL - && !i965_check_composite_texture(scrn, mask_picture, 1)) { - intel_debug_fallback(scrn, "Check Mask picture texture\n"); - return FALSE; + return TRUE; } - if (!i965_get_dest_format(dest_picture, &tmp1)) { - intel_debug_fallback(scrn, "Get Color buffer format\n"); - return FALSE; - } - - return TRUE; - + return FALSE; } + #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) /* Set up a default static partitioning of the URB, which is supposed to @@ -1143,6 +1142,12 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_RENDER; + /* Mark the destination dirty within this batch */ + intel_batch_mark_pixmap_domains(intel, + i830_get_pixmap_intel(dest), + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + urb_vs_start = 0; urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; urb_gs_start = urb_vs_start + urb_vs_size; @@ -1167,12 +1172,8 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) */ ALIGN_BATCH(64); + assert(intel->in_batch_atomic); { - if (IS_IGDNG(intel)) - ATOMIC_BATCH(14); - else - ATOMIC_BATCH(12); - /* Match Mesa driver setup */ OUT_BATCH(MI_FLUSH | MI_STATE_INSTRUCTION_CACHE_FLUSH | @@ -1215,12 +1216,17 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) OUT_BATCH(BRW_STATE_SIP | 0); OUT_RELOC(render_state->sip_kernel_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - ADVANCE_BATCH(); + } + + if (IS_IGDNG(intel)) { + /* Ironlake errata workaround: Before disabling the clipper, + * you have to MI_FLUSH to get the pipeline idle. + */ + OUT_BATCH(MI_FLUSH); } { int pipe_ctrl; - ATOMIC_BATCH(26); /* Pipe control */ if (IS_IGDNG(intel)) @@ -1315,7 +1321,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) OUT_BATCH(BRW_CS_URB_STATE | 0); OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); - ADVANCE_BATCH(); } { /* @@ -1342,7 +1347,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) } if (IS_IGDNG(intel)) { - ATOMIC_BATCH(mask ? 9 : 7); /* * The reason to add this extra vertex element in the header is that * IGDNG has different vertex header definition and origin method to @@ -1372,7 +1376,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); } else { - ATOMIC_BATCH(mask ? 7 : 5); /* Set up our vertex elements, sourced from the single vertex buffer. * that will be set up later. */ @@ -1434,8 +1437,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) else OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (w_component << VE1_VFCOMPONENT_2_SHIFT) | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ } - - ADVANCE_BATCH(); } } @@ -1496,6 +1497,22 @@ i965_prepare_composite(int op, PicturePtr source_picture, } if (mask_picture) { + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i965_blend_op[op].src_alpha && + (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + composite_op->mask_filter = sampler_state_filter_from_picture(mask_picture->filter); if (composite_op->mask_filter < 0) { @@ -1515,6 +1532,12 @@ i965_prepare_composite(int op, PicturePtr source_picture, composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE; } + /* Flush any pending writes prior to relocating the textures. */ + if(i830_uxa_pixmap_is_dirty(source) || + (mask && i830_uxa_pixmap_is_dirty(mask))) + intel_batch_emit_flush(scrn); + + /* Set up the surface states. */ surface_state_bo = dri_bo_alloc(intel->bufmgr, "surface_state", 3 * sizeof(brw_surface_state_padded), @@ -1638,7 +1661,7 @@ i965_prepare_composite(int op, PicturePtr source_picture, } if (!i965_composite_check_aperture(scrn)) { - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); if (!i965_composite_check_aperture(scrn)) { intel_debug_fallback(scrn, "Couldn't fit render operation " @@ -1647,10 +1670,6 @@ i965_prepare_composite(int op, PicturePtr source_picture, } } - if(i830_uxa_pixmap_is_dirty(source) || - (mask && i830_uxa_pixmap_is_dirty(mask))) - intel_batch_emit_flush(scrn); - intel->needs_render_state_emit = TRUE; return TRUE; @@ -1815,13 +1834,12 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb); if (!i965_composite_check_aperture(scrn)) - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); intel_batch_start_atomic(scrn, 200); if (intel->needs_render_state_emit) i965_emit_composite_state(scrn); - ATOMIC_BATCH(12); OUT_BATCH(MI_FLUSH); /* Set up the pointer to our (single) vertex buffer */ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); @@ -1846,7 +1864,6 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, OUT_BATCH(1); /* single instance */ OUT_BATCH(0); /* start instance location */ OUT_BATCH(0); /* index buffer offset, ignored */ - ADVANCE_BATCH(); render_state->vb_offset += i; drm_intel_bo_unreference(vb_bo); diff --git a/src/i965_video.c b/src/i965_video.c index c5378608..e25184b6 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -779,18 +779,12 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf) urb_cs_start = urb_sf_start + urb_sf_size; urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; - ATOMIC_BATCH(2); OUT_BATCH(MI_FLUSH | MI_STATE_INSTRUCTION_CACHE_FLUSH | BRW_MI_GLOBAL_SNAPSHOT_RESET); OUT_BATCH(MI_NOOP); - ADVANCE_BATCH(); /* brw_debug (scrn, "before base address modify"); */ - if (IS_IGDNG(intel)) - ATOMIC_BATCH(14); - else - ATOMIC_BATCH(12); /* Match Mesa driver setup */ if (IS_G4X(intel) || IS_IGDNG(intel)) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); @@ -834,9 +828,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf) OUT_RELOC(intel->video.gen4_sip_kernel_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(MI_NOOP); - ADVANCE_BATCH(); - /* brw_debug (scrn, "after base address modify"); */ if (IS_IGDNG(intel)) @@ -844,10 +835,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf) else pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; - ATOMIC_BATCH(38); - - OUT_BATCH(MI_NOOP); - /* Pipe control */ OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); OUT_BATCH(0); /* Destination address */ @@ -971,9 +958,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf) VE1_VFCOMPONENT_3_SHIFT) | (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); } - - OUT_BATCH(MI_NOOP); /* pad to quadword */ - ADVANCE_BATCH(); } void @@ -1213,14 +1197,13 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn, if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) { - intel_batch_submit(scrn); + intel_batch_submit(scrn, FALSE); } intel_batch_start_atomic(scrn, 100); i965_emit_video_setup(scrn, bind_bo, n_src_surf); - ATOMIC_BATCH(12); /* Set up the pointer to our vertex buffer */ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); /* four 32-bit floats per vertex */ @@ -1242,7 +1225,6 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn, OUT_BATCH(0); /* start instance location */ OUT_BATCH(0); /* index buffer offset, ignored */ OUT_BATCH(MI_NOOP); - ADVANCE_BATCH(); intel_batch_end_atomic(scrn); diff --git a/src/xvmc/i965_xvmc.c b/src/xvmc/i965_xvmc.c index 1665f6d0..52b93e32 100644 --- a/src/xvmc/i965_xvmc.c +++ b/src/xvmc/i965_xvmc.c @@ -207,7 +207,7 @@ struct media_state { }; struct media_state media_state; -static int free_object(struct media_state *s) +static void free_object(struct media_state *s) { int i; #define FREE_ONE_BO(bo) drm_intel_bo_unreference(bo) diff --git a/src/xvmc/xvmc_vld.c b/src/xvmc/xvmc_vld.c index 5687593a..9ef840b3 100644 --- a/src/xvmc/xvmc_vld.c +++ b/src/xvmc/xvmc_vld.c @@ -354,7 +354,7 @@ static struct media_state { /* XvMCQMatrix * 2 + idct_table + 8 * kernel offset pointer */ #define CS_OBJECT_SIZE (32*20 + sizeof(unsigned int) * 8) -static int free_object(struct media_state *s) +static void free_object(struct media_state *s) { int i; #define FREE_ONE_BO(bo) \ |