From 7f252345c0c6b260c5b37aff98a22679eb5c6c82 Mon Sep 17 00:00:00 2001 From: Paulo Cesar Pereira de Andrade Date: Fri, 19 Dec 2008 21:15:01 -0200 Subject: Enable the SMI 501/502 command list interpreter in a "debug" build. To enable it, set SMI501_CLI_DEBUG to 1 in smi.h, and use Option "AcellMethod "EXA" in the Device section of /etc/X11/xorg.conf This code is enabled mainly for debug purposes. To make if have an actual performance gain (like when using a sm50x with a "low profile" "main" processor") it should be required to actually do busy loops in kernel mode (and hope the costs of context switch will pay it). In kernel mode it is possible to wait for an interrupt being triggered when the command list is processed, or when the 2d engine is idle. This commit should be functional, but, mainly due to debug messages, should be significantly slower then a build with MI501_CLI_DEBUG defined to 0. --- src/smi.h | 10 ++++++ src/smi_501.h | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/smi_driver.c | 15 ++++++++ src/smi_exa.c | 70 ++++++++++++++++++++++++++++++++++--- 4 files changed, 194 insertions(+), 4 deletions(-) diff --git a/src/smi.h b/src/smi.h index c31426c..32c13b1 100644 --- a/src/smi.h +++ b/src/smi.h @@ -71,6 +71,7 @@ authorization from the XFree86 Project and Silicon Motion. #define SMI_USE_IMAGE_WRITES 0 #define SMI_USE_VIDEO 1 #define SMI_USE_CAPTURE 1 +#define SMI501_CLI_DEBUG 0 /* * Leaving attempt implementation of an argb cursor using alpha plane @@ -262,6 +263,15 @@ typedef struct structure */ void (*BlockHandler)(int i, pointer blockData, pointer pTimeout, pointer pReadMask); +#ifdef SMI501_CLI_DEBUG + /* SMI 501/502 Command List Interpreter */ + Bool batch_active; + int64_t *batch_handle; /* Virtual address */ + int batch_offset; /* Physical smi 501 address */ + int batch_length; /* Length in 8 byte units */ + int batch_finish; /* Last finish command offset */ + int batch_index; +#endif } SMIRec, *SMIPtr; #define SMIPTR(p) ((SMIPtr)((p)->driverPrivate)) diff --git a/src/smi_501.h b/src/smi_501.h index 2a29ce6..c19721b 100644 --- a/src/smi_501.h +++ b/src/smi_501.h @@ -1332,6 +1332,109 @@ typedef union smi_cli_entry { int64_t value; } smi_cli_entry_t; + +#ifdef SMI501_CLI_DEBUG + +/* ensure there are "count" command list "slots" 8 bytes wide free */ +#define BATCH_BEGIN(COUNT) \ + do { \ + if (IS_MSOC(pSmi)) { \ + smi_cli_entry_t *entry; \ + MSOCCmdAddrRec address; \ + \ + pSmi->batch_active = TRUE; \ + ErrorF("BATCH_BEGIN(%d)\n", COUNT); \ + /* One for finish */ \ + if (pSmi->batch_index + COUNT + 1 >= pSmi->batch_length) { \ + entry = (smi_cli_entry_t *) \ + &pSmi->batch_handle[pSmi->batch_index]; \ + entry->f.cmd = smi_cli_goto; \ + /* start of buffer */ \ + entry->f.base = pSmi->batch_offset; \ + /* absolute jump */ \ + entry->f.data = 0; \ + ErrorF("wrap: from %d\n", pSmi->batch_index); \ + address.value = READ_SCR(pSmi, CMD_ADDR); \ + pSmi->batch_index = 0; \ + } \ + } \ + } while (0) + +/* load register */ +#define BATCH_LOAD_REG(PORT, VALUE) \ + do { \ + smi_cli_entry_t *entry = (smi_cli_entry_t *) \ + &pSmi->batch_handle[pSmi->batch_index++]; \ + \ + ErrorF("BATCH_LOAD_REG(%x, %x)\n", PORT, VALUE); \ + entry->f.cmd = smi_cli_load_reg; \ + entry->f.base = PORT; \ + entry->f.data = VALUE; \ + } while (0) + +/* Appending to the Command List + * + * The procedure for chaining command lists is: + * 1. Fill the command list buffer after the last FINISH command. + * The software should always keep track of the address of the + * last FINISH command. + * 2. Terminate the command list with a FINISH and remember the + * address of this FINISH. + * 3. Stop the command list by programming "0" in bit 31 of the + * Command List Address register. + * 4. Read and remember the current program counter. + * 5. Replace the previous FINISH command with a NOP command + * (00000000C0000000). + * 6. Restart the command list by programming the saved program counter + * and "1" in bit 31 of the Command List Address register. + */ +#define BATCH_END() \ + do { \ + if (pSmi->batch_active) { \ + MSOCCmdAddrRec address; \ + smi_cli_entry_t *entry = (smi_cli_entry_t *) \ + &pSmi->batch_handle[pSmi->batch_index]; \ + \ + ErrorF("BATCH_END()\n"); \ + pSmi->batch_active = FALSE; \ + /* Add new finish command */ \ + entry->f.cmd = smi_cli_finish; \ + /* Don't generate irq when processing the finish command */ \ + entry->f.base = 0; \ + address.value = READ_SCR(pSmi, CMD_ADDR); \ + ErrorF("<
batch_offset) >> 3, \ + pSmi->batch_finish, pSmi->batch_index); \ + address.f.start = 0; \ + WRITE_SCR(pSmi, CMD_ADDR, address.value); \ + WaitIdle(); \ + if (pSmi->batch_finish >= 0) \ + pSmi->batch_handle[pSmi->batch_finish] = \ + /* wait for idle engine */ \ + /* just add a noop as there are 2 WaitIdle()'s */ \ + /*0x180002601e0007ll*/0x00000000c0000000ll/*0x60060005ll*/; \ + address.f.address = pSmi->batch_offset + \ + ((pSmi->batch_finish + 1) << 3); \ + /* New finish is current index */ \ + pSmi->batch_finish = pSmi->batch_index; \ + /* Where to start adding new entries */ \ + ++pSmi->batch_index; \ + /* Start executing list again */ \ + address.f.start = 1; \ + WRITE_SCR(pSmi, CMD_ADDR, address.value); \ + do { \ + address.value = READ_SCR(pSmi, CMD_ADDR); \ + ErrorF("loop: %x\n", address.value); \ + } while (!address.f.idle); \ + WaitIdle(); \ + ErrorF(">>address = %d, finish = %d, index = %d\n", \ + (address.f.address - pSmi->batch_offset) >> 3, \ + pSmi->batch_finish, pSmi->batch_index); \ + } \ + } while (0) + +#endif + /* * 512 kb reserved for usb buffers * diff --git a/src/smi_driver.c b/src/smi_driver.c index 01198ae..79d8a0c 100644 --- a/src/smi_driver.c +++ b/src/smi_driver.c @@ -1522,6 +1522,21 @@ SMI_MapMem(ScrnInfoPtr pScrn) #else pSmi->FBReserved = pSmi->FBCursorOffset = pSmi->videoRAMBytes - (pSmi->Dualhead ? SMI501_CURSOR_SIZE << 1 : SMI501_CURSOR_SIZE); + +# ifdef SMI501_CLI_DEBUG + if (pSmi->useEXA) { + pSmi->batch_active = FALSE; + pSmi->batch_length = 4096; + pSmi->FBReserved -= pSmi->batch_length << 3; + pSmi->batch_offset = pSmi->FBReserved; + pSmi->batch_handle = (int64_t *)(pSmi->FBBase + pSmi->batch_offset); + pSmi->batch_finish = -1; + pSmi->batch_index = 0; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using command list interpreter debug code\n"); + } +# endif + #endif } else { diff --git a/src/smi_exa.c b/src/smi_exa.c index aeed606..1b6d42f 100644 --- a/src/smi_exa.c +++ b/src/smi_exa.c @@ -27,6 +27,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "smi.h" +#if SMI501_CLI_DEBUG +# include "smi_501.h" +# undef WRITE_DPR +# define WRITE_DPR(pSmi, dpr, data) \ + do { \ + if (pSmi->batch_active) \ + BATCH_LOAD_REG((pSmi->DPRBase - pSmi->MapBase) + \ + dpr, data); \ + else \ + MMIO_OUT32(pSmi->DPRBase, dpr, data); \ + DEBUG("DPR%02X = %08X\n", dpr, data); \ + } while (0) +#endif static void SMI_EXASync(ScreenPtr pScreen, int marker); @@ -241,13 +254,16 @@ SMI_PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, pSmi->AccelCmd |= SMI_RIGHT_TO_LEFT; } - WaitQueue(); - if (pDstPixmap->drawable.bitsPerPixel == 24) { src_pitch *= 3; dst_pitch *= 3; } +#if SMI501_CLI_DEBUG + BATCH_BEGIN(7); +#else + WaitQueue(); +#endif /* Destination and Source Window Widths */ WRITE_DPR(pSmi, 0x3C, (dst_pitch << 16) | (src_pitch & 0xFFFF)); /* Destination and Source Row Pitch */ @@ -266,6 +282,9 @@ SMI_PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, WRITE_DPR(pSmi, 0x44, dst_offset); WRITE_DPR(pSmi, 0x0C, pSmi->AccelCmd); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif LEAVE(TRUE); } @@ -304,10 +323,17 @@ SMI_Copy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, } } +#if SMI501_CLI_DEBUG + BATCH_BEGIN(3); +#else WaitQueue(); +#endif WRITE_DPR(pSmi, 0x00, (srcX << 16) + (srcY & 0xFFFF)); WRITE_DPR(pSmi, 0x04, (dstX << 16) + (dstY & 0xFFFF)); WRITE_DPR(pSmi, 0x08, (width << 16) + (height & 0xFFFF)); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif LEAVE(); } @@ -371,12 +397,16 @@ SMI_PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) | SMI_BITBLT | SMI_QUICK_START; - WaitQueue(); - if (pPixmap->drawable.bitsPerPixel == 24) { dst_pitch *= 3; } +#if SMI501_CLI_DEBUG + BATCH_BEGIN(10); +#else + WaitQueue(); +#endif + /* Destination Window Width */ WRITE_DPR(pSmi, 0x3C, (dst_pitch << 16) | (dst_pitch & 0xFFFF)); /* Destination Row Pitch */ @@ -401,6 +431,9 @@ SMI_PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) WRITE_DPR(pSmi, 0x38, 0xFFFFFFFF); WRITE_DPR(pSmi, 0x0C, pSmi->AccelCmd); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif LEAVE(TRUE); } @@ -427,9 +460,16 @@ SMI_Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2) } } +#if SMI501_CLI_DEBUG + BATCH_BEGIN(2); +#else WaitQueue(); +#endif WRITE_DPR(pSmi, 0x04, (x1 << 16) | (y1 & 0xFFFF)); WRITE_DPR(pSmi, 0x08, (w << 16) | (h & 0xFFFF)); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif LEAVE(); } @@ -503,7 +543,11 @@ SMI_UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, /* set clipping */ SMI_SetClippingRectangle(pScrn, x, y, x+w, y+h); +#if SMI501_CLI_DEBUG + BATCH_BEGIN(9); +#else WaitQueue(); +#endif /* Destination and Source Window Widths */ WRITE_DPR(pSmi, 0x3C, (dst_pixelpitch << 16) | (src_pixelpitch & 0xFFFF)); @@ -528,6 +572,9 @@ SMI_UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, WRITE_DPR(pSmi, 0x00, 0); WRITE_DPR(pSmi, 0x04, (x << 16) | (y & 0xFFFF)); WRITE_DPR(pSmi, 0x08, (w << 16) | (h & 0xFFFF)); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif while (h--) { memcpy(pSmi->DataPortBase, src, aligned_pitch); @@ -585,7 +632,11 @@ SMI_PrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, Pi ENTER(); +#if SMI501_CLI_DEBUG + BATCH_BEGIN(7); +#else WaitQueue(); +#endif /* Destination and Source Window Widths */ WRITE_DPR(pSmi, 0x3C, (dst_pitch << 16) | (src_pitch & 0xFFFF)); @@ -611,6 +662,10 @@ SMI_PrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, Pi WRITE_DPR(pSmi, 0x0C, 0xCC /*GXCopy*/ | SMI_ROTATE_BLT | SMI_ROTATE_CCW | SMI_QUICK_START); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif + pSmi->renderTransform = pSrcPicture->transform; LEAVE(TRUE); @@ -640,11 +695,18 @@ SMI_Composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, v.vector[2] = xFixed1; PictureTransformPoint(t, &v); +#if SMI501_CLI_DEBUG + BATCH_BEGIN(3); +#else WaitQueue(); +#endif WRITE_DPR(pSmi, 0x00, (xFixedToInt(v.vector[0]) << 16) + (xFixedToInt(v.vector[1]) & 0xFFFF)); WRITE_DPR(pSmi, 0x04, (dstX << 16) + (dstY & 0xFFFF)); WRITE_DPR(pSmi, 0x08, (height << 16) + (width & 0xFFFF)); +#if SMI501_CLI_DEBUG + BATCH_END(); +#endif LEAVE(); } -- cgit v1.2.3