diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2010-12-31 21:16:32 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2010-12-31 21:16:32 +0000 |
commit | 33197356cbeb38b4af528365cdc86fbe2d977baa (patch) | |
tree | 632577701ecce814cb7260b410742d74e2606e14 | |
parent | cb9d6cf2e50dbd1a414334f0849662d34214613d (diff) |
Yet another rework of the cache flushing routines. Fixes some bugs, probably
introduces new ones as well. Main highlights are:
- 88200 lines which got marked as unusable by the BUG selftests will not be
reenabled at CMMU initialization time.
- better granularity in the 88110/88410 routines, to operate on ranges closer
to the actual requested area, errata permitting.
-rw-r--r-- | sys/arch/m88k/m88k/m8820x_machdep.c | 265 | ||||
-rw-r--r-- | sys/arch/mvme88k/mvme88k/m88110.c | 297 |
2 files changed, 351 insertions, 211 deletions
diff --git a/sys/arch/m88k/m88k/m8820x_machdep.c b/sys/arch/m88k/m88k/m8820x_machdep.c index e9e2cb0b4c2..96a1a799a25 100644 --- a/sys/arch/m88k/m88k/m8820x_machdep.c +++ b/sys/arch/m88k/m88k/m8820x_machdep.c @@ -1,27 +1,18 @@ -/* $OpenBSD: m8820x_machdep.c,v 1.43 2010/12/31 21:12:16 miod Exp $ */ +/* $OpenBSD: m8820x_machdep.c,v 1.44 2010/12/31 21:16:31 miod Exp $ */ /* - * Copyright (c) 2004, 2007, Miodrag Vallat. + * Copyright (c) 2004, 2007, 2010, Miodrag Vallat. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Copyright (c) 2001 Steve Murphree, Jr. @@ -126,7 +117,8 @@ struct cmmu_p cmmu8820x = { }; /* - * Systems with more than 2 CMMUs per CPU use programmable split schemes. + * Systems with more than 2 CMMUs per CPU use split schemes, which sometimes + * are programmable (well, no more than having a few hardwired choices). * * The following schemes are available on MVME188 boards: * - split on A12 address bit (A14 for 88204) @@ -142,8 +134,8 @@ struct cmmu_p cmmu8820x = { * splits seem less efficient. * * The really nasty part of this choice is in the exception handling code, - * when it needs to get error information from up to 4 CMMUs. See eh.S on - * mvme88k for the gory details, luna88k is more sane. + * when it needs to get error information from up to 4 CMMUs. See eh.S for + * the gory details. */ struct m8820x_cmmu m8820x_cmmu[MAX_CMMUS]; @@ -154,11 +146,11 @@ u_int cmmu_shift; void m8820x_cmmu_set_reg(int, u_int, int, int, int); void m8820x_cmmu_set_cmd(u_int, int, int, int, vaddr_t); void m8820x_cmmu_wait(int); -void m8820x_cmmu_sync_cache(int, paddr_t, psize_t); -void m8820x_cmmu_sync_inval_cache(int, paddr_t, psize_t); -void m8820x_cmmu_inval_cache(int, paddr_t, psize_t); +void m8820x_cmmu_wb_locked(int, paddr_t, psize_t); +void m8820x_cmmu_wbinv_locked(int, paddr_t, psize_t); +void m8820x_cmmu_inv_locked(int, paddr_t, psize_t); -/* Flags passed to m8820x_cmmu_set() */ +/* Flags passed to m8820x_cmmu_set_*() */ #define MODE_VAL 0x01 #define ADDR_VAL 0x02 @@ -252,9 +244,7 @@ m8820x_cmmu_wait(int cpu) panic("cache flush failed!"); } #else - /* force the read access, but do not issue this statement... */ - __asm__ __volatile__ ("|or r0, r0, %0" :: - "r" (cmmu->cmmu_regs[CMMU_SSR])); + (void)cmmu->cmmu_regs[CMMU_SSR]; #endif } } @@ -398,7 +388,7 @@ m8820x_initialize_cpu(cpuid_t cpu) struct cpu_info *ci; struct m8820x_cmmu *cmmu; u_int line, cnt; - int cssp, sctr, type; + int cssp, type; apr_t apr; apr = ((0x00000 << PG_BITS) | CACHE_WT | CACHE_GLOBAL | CACHE_INH) & @@ -440,7 +430,20 @@ m8820x_initialize_cpu(cpuid_t cpu) for (line = 0; line <= 255; line++) { cmmu->cmmu_regs[CMMU_SAR] = line << MC88200_CACHE_SHIFT; + if (cmmu->cmmu_regs[CMMU_CSSP(cssp)] & + (CMMU_CSSP_D3 | CMMU_CSSP_D2 | + CMMU_CSSP_D1 | CMMU_CSSP_D0)) { + printf("cpu%d: CMMU@%p has disabled" + " cache lines in set 0x%03x," + " cssp %08x\n", + cpu, cmmu->cmmu_regs, + (cssp << 8) | line, + cmmu->cmmu_regs[CMMU_CSSP(cssp)]); + } cmmu->cmmu_regs[CMMU_CSSP(cssp)] = + (cmmu->cmmu_regs[CMMU_CSSP(cssp)] & + ~(CMMU_CSSP_D3 | CMMU_CSSP_D2 | + CMMU_CSSP_D1 | CMMU_CSSP_D0)) | CMMU_CSSP_L5 | CMMU_CSSP_L4 | CMMU_CSSP_L3 | CMMU_CSSP_L2 | CMMU_CSSP_L1 | CMMU_CSSP_L0 | @@ -452,18 +455,11 @@ m8820x_initialize_cpu(cpuid_t cpu) /* * Set the SCTR, SAPR, and UAPR to some known state. - * Snooping is enabled as soon as the system uses more than - * two CMMUs; for instruction CMMUs as well so that we can - * share breakpoints. + * Snooping is always enabled, so that we do not need to + * writeback userland code pages when they first get filled + * as data pages. */ - sctr = 0; - if (cmmu_shift > 1) - sctr |= CMMU_SCTR_SE; -#ifdef MULTIPROCESSOR - if (ncpusfound > 1) - sctr |= CMMU_SCTR_SE; -#endif - cmmu->cmmu_regs[CMMU_SCTR] = sctr; + cmmu->cmmu_regs[CMMU_SCTR] = CMMU_SCTR_SE; cmmu->cmmu_regs[CMMU_SAPR] = cmmu->cmmu_regs[CMMU_UAPR] = apr; @@ -472,8 +468,7 @@ m8820x_initialize_cpu(cpuid_t cpu) cmmu->cmmu_regs[CMMU_BWP4] = cmmu->cmmu_regs[CMMU_BWP5] = cmmu->cmmu_regs[CMMU_BWP6] = cmmu->cmmu_regs[CMMU_BWP7] = 0; cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_CACHE_INV_ALL; - __asm__ __volatile__ ("|or r0, r0, %0" :: - "r" (cmmu->cmmu_regs[CMMU_SSR])); + (void)cmmu->cmmu_regs[CMMU_SSR]; cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_SUPER_ALL; cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_USER_ALL; } @@ -543,9 +538,6 @@ m8820x_set_uapr(apr_t ap) * Functions that invalidate TLB entries. */ -/* - * flush any tlb - */ void m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count) { @@ -589,11 +581,10 @@ m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count) /* * Functions that invalidate caches. * - * Cache invalidates require physical addresses. + * Cache operations require physical addresses. * - * We don't push Instruction Caches prior to invalidate because they are not - * snooped and never modified (I guess it doesn't matter then which form - * of the command we use then). + * We don't writeback instruction caches prior to invalidate because they + * are never modified. * * Note that on systems with more than two CMMUs per CPU, we can not benefit * from the address split - the split is done on virtual (not translated yet) @@ -604,7 +595,7 @@ m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count) #define round_cache_line(a) trunc_cache_line((a) + MC88200_CACHE_LINE - 1) /* - * flush both Instruction and Data caches + * invalidate I$, writeback and invalidate D$ */ void m8820x_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size) @@ -709,10 +700,10 @@ m8820x_icache_inv(cpuid_t cpu, paddr_t pa, psize_t size) } /* - * sync dcache - icache is never dirty but needs to be invalidated as well. + * writeback D$ */ void -m8820x_cmmu_sync_cache(int cpu, paddr_t pa, psize_t size) +m8820x_cmmu_wb_locked(int cpu, paddr_t pa, psize_t size) { if (size <= MC88200_CACHE_LINE) { m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CB_LINE, @@ -724,25 +715,27 @@ m8820x_cmmu_sync_cache(int cpu, paddr_t pa, psize_t size) m8820x_cmmu_wait(cpu); } +/* + * invalidate I$, writeback and invalidate D$ + */ void -m8820x_cmmu_sync_inval_cache(int cpu, paddr_t pa, psize_t size) +m8820x_cmmu_wbinv_locked(int cpu, paddr_t pa, psize_t size) { if (size <= MC88200_CACHE_LINE) { - m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_LINE, - MODE_VAL, cpu, INST_CMMU, pa); m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_LINE, - MODE_VAL, cpu, DATA_CMMU, pa); + MODE_VAL, cpu, 0, pa); } else { - m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_PAGE, - MODE_VAL, cpu, INST_CMMU, pa); m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_PAGE, - MODE_VAL, cpu, DATA_CMMU, pa); + MODE_VAL, cpu, 0, pa); } m8820x_cmmu_wait(cpu); } +/* + * invalidate I$ and D$ + */ void -m8820x_cmmu_inval_cache(int cpu, paddr_t pa, psize_t size) +m8820x_cmmu_inv_locked(int cpu, paddr_t pa, psize_t size) { if (size <= MC88200_CACHE_LINE) { m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_LINE, 0, cpu, 0, pa); @@ -771,31 +764,114 @@ m8820x_dma_cachectl(paddr_t _pa, psize_t _size, int op) paddr_t pa; psize_t size, count; void (*flusher)(int, paddr_t, psize_t); + struct { + paddr_t pa; + psize_t size; + void (*flusher)(int, paddr_t, psize_t); + } ops[3], *curop; + uint nops; pa = trunc_cache_line(_pa); size = round_cache_line(_pa + _size) - pa; + nops = 0; + curop = ops; switch (op) { case DMA_CACHE_SYNC: - flusher = m8820x_cmmu_sync_cache; + /* + * If the range does not span complete cache lines, + * force invalidation of the incomplete lines. The + * rationale behind this is that these incomplete lines + * will probably need to be invalidated later, and + * we do not want to risk having stale data in the way. + */ + if (pa != _pa) { + curop->pa = pa; + curop->size = MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_wbinv_locked; + curop++; + pa += MC88200_CACHE_LINE; + size -= MC88200_CACHE_LINE; + if (size == 0) + break; + } + if (pa + size == _pa + _size) { + curop->pa = pa; + curop->size = size; + curop->flusher = m8820x_cmmu_wb_locked; + curop++; + } else { + if (size != MC88200_CACHE_LINE) { + curop->pa = pa; + curop->size = size - MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_wb_locked; + pa += curop->size; + curop++; + } + curop->pa = pa; + curop->size = MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_wbinv_locked; + curop++; + } break; case DMA_CACHE_SYNC_INVAL: - flusher = m8820x_cmmu_sync_inval_cache; + curop->pa = pa; + curop->size = size; + curop->flusher = m8820x_cmmu_wbinv_locked; + curop++; break; default: - if (pa != _pa || size != _size) { - /* - * Theoretically, we should preserve the data from - * the two incomplete cache lines. - * However, callers are expected to have asked - * for a cache sync before, so we do not risk too - * much by not doing this. - */ + case DMA_CACHE_INV: +#if 0 + /* + * Preserve the data from the incomplete cache lines (up to + * two), and discard the lines in-between (if any). + */ + if (pa != _pa) { + curop->pa = pa; + curop->size = MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_wbinv_locked; + curop++; + pa += MC88200_CACHE_LINE; + size -= MC88200_CACHE_LINE; + if (size == 0) + break; + } + if (pa + size == _pa + _size) { + curop->pa = pa; + curop->size = size; + curop->flusher = m8820x_cmmu_inv_locked; + curop++; + } else { + if (size != MC88200_CACHE_LINE) { + curop->pa = pa; + curop->size = size - MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_inv_locked; + pa += curop->size; + curop++; + } + curop->pa = pa; + curop->size = MC88200_CACHE_LINE; + curop->flusher = m8820x_cmmu_wbinv_locked; + curop++; } - flusher = m8820x_cmmu_inval_cache; +#else + /* + * Even if there are incomplete cache lines affected, assume + * they were evicted earlier. + * XXX We ought to save the partial cache lines, invalidate, + * XXX and put outside-the-range bytes back... + */ + curop->pa = pa; + curop->size = size; + curop->flusher = m8820x_cmmu_inv_locked; + curop++; +#endif break; } + nops = curop - ops; + #ifndef MULTIPROCESSOR cpu = cpu_number(); #endif @@ -804,31 +880,36 @@ m8820x_dma_cachectl(paddr_t _pa, psize_t _size, int op) set_psr(psr | PSR_IND); CMMU_LOCK; - while (size != 0) { - count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ? + for (curop = ops; nops != 0; curop++, nops--) { + pa = curop->pa; + size = curop->size; + flusher = curop->flusher; + while (size != 0) { + count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ? PAGE_SIZE : MC88200_CACHE_LINE; #ifdef MULTIPROCESSOR - /* writeback on a single cpu... */ - (*flusher)(ci->ci_cpuid, pa, count); - - /* invalidate on all... */ - if (flusher != m8820x_cmmu_sync_cache) { - for (cpu = 0; cpu < MAX_CPUS; cpu++) { - if (!ISSET(m88k_cpus[cpu].ci_flags, - CIF_ALIVE)) - continue; - if (cpu == ci->ci_cpuid) - continue; - m8820x_cmmu_inval_cache(cpu, pa, count); + /* writeback on a single cpu... */ + (*flusher)(ci->ci_cpuid, pa, count); + + /* invalidate on all... */ + if (flusher != m8820x_cmmu_wb_locked) { + for (cpu = 0; cpu < MAX_CPUS; cpu++) { + if (!ISSET(m88k_cpus[cpu].ci_flags, + CIF_ALIVE)) + continue; + if (cpu == ci->ci_cpuid) + continue; + m8820x_cmmu_inv_locked(cpu, pa, count); + } } - } #else /* MULTIPROCESSOR */ - (*flusher)(cpu, pa, count); + (*flusher)(cpu, pa, count); #endif /* MULTIPROCESSOR */ - pa += count; - size -= count; + pa += count; + size -= count; + } } CMMU_UNLOCK; diff --git a/sys/arch/mvme88k/mvme88k/m88110.c b/sys/arch/mvme88k/mvme88k/m88110.c index ab28fc5e9e9..25ad03cf14a 100644 --- a/sys/arch/mvme88k/mvme88k/m88110.c +++ b/sys/arch/mvme88k/mvme88k/m88110.c @@ -1,4 +1,20 @@ -/* $OpenBSD: m88110.c,v 1.67 2010/12/31 21:12:16 miod Exp $ */ +/* $OpenBSD: m88110.c,v 1.68 2010/12/31 21:16:31 miod Exp $ */ + +/* + * Copyright (c) 2010 Miodrag Vallat. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ /* * Copyright (c) 1998 Steve Murphree, Jr. * All rights reserved. @@ -148,9 +164,9 @@ struct cmmu_p cmmu88410 = { void patc_clear(void); -void m88110_cmmu_sync_cache(paddr_t, psize_t); -void m88110_cmmu_sync_inval_cache(paddr_t, psize_t); -void m88110_cmmu_inval_cache(paddr_t, psize_t); +void m88110_cmmu_wb_locked(paddr_t, psize_t); +void m88110_cmmu_wbinv_locked(paddr_t, psize_t); +void m88110_cmmu_inv_locked(paddr_t, psize_t); void patc_clear(void) @@ -358,35 +374,7 @@ m88410_initialize_cpu(cpuid_t cpu) dctl |= CMMU_DCTL_SEN; set_dctl(dctl); CMMU_LOCK; -#if 0 - mc88410_inval(); /* clear external data cache */ -#else - /* - * We can't invalidate the 88410 cache without flushing it first; - * this is probably due to either an error in the cpu-to-88410 - * communication protocol, or to a bug in the '410 (but since I - * do not know how to get its revision, I can't tell whether this - * is the obscure v1 bug or not). - * - * Since we can't flush random data either, fill the secondary - * cache first, before flushing it. - * - * The smallest 88410 cache line is 32 bytes, and the largest size - * is 1MB. - */ - { - vaddr_t va; - uint32_t junk = 0; - - for (va = 0; va < 1024 * 1024; va += 32) - junk += *(uint32_t *)va; - - /* to make sure the above loop isn't optimized away */ - mc88110_wbinv_data_page(junk & PAGE_SIZE); - } - mc88410_wb(); - mc88410_inval(); -#endif + mc88410_inv(); /* clear external data cache */ CMMU_UNLOCK; } @@ -505,7 +493,7 @@ m88110_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count) * This really only matters to us when running a MULTIPROCESSOR kernel * (otherwise there is no snooping happening), and given the intrusive * changes it requires (see the comment about invalidates being turned - * into flushes with invalidate in m88110_cmmu_inval_cache below), as + * into flushes with invalidate in m88110_cmmu_inv_locked below), as * well as the small performance impact it has), we define a specific * symbol to enable the suggested workaround. * @@ -520,19 +508,44 @@ m88110_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count) #define round_cache_line(a) trunc_cache_line((a) + MC88110_CACHE_LINE - 1) /* - * Flush both Instruction and Data caches + * invalidate I$, writeback and invalidate D$ */ void m88110_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size) { u_int32_t psr; + psize_t count; + +#ifdef ENABLE_88110_ERRATA_17 + size = round_page(pa + size) - trunc_page(pa); + pa = trunc_page(pa); +#else + size = round_cache_line(pa + size) - trunc_cache_line(pa); + pa = trunc_cache_line(pa); +#endif psr = get_psr(); set_psr(psr | PSR_IND); mc88110_inval_inst(); - mc88110_wb_data(); + while (size != 0) { +#ifdef ENABLE_88110_ERRATA_17 + mc88110_wb_data_page(pa); + mc88110_wbinv_data_page(pa); + count = PAGE_SIZE; +#else + if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) { + mc88110_wbinv_data_page(pa); + count = PAGE_SIZE; + } else { + mc88110_wbinv_data_line(pa); + count = MC88110_CACHE_LINE; + } +#endif + pa += count; + size -= count; + } set_psr(psr); } @@ -541,6 +554,7 @@ void m88410_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size) { u_int32_t psr; + psize_t count; #ifdef MULTIPROCESSOR struct cpu_info *ci = curcpu(); @@ -550,12 +564,36 @@ m88410_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size) } #endif +#ifdef ENABLE_88110_ERRATA_17 + size = round_page(pa + size) - trunc_page(pa); + pa = trunc_page(pa); +#else + size = round_cache_line(pa + size) - trunc_cache_line(pa); + pa = trunc_cache_line(pa); +#endif + psr = get_psr(); set_psr(psr | PSR_IND); mc88110_inval_inst(); - /* flush all data to avoid errata invalidate */ - mc88110_wb_data(); + while (size != 0) { +#ifdef ENABLE_88110_ERRATA_17 + mc88110_wb_data_page(pa); + mc88110_wbinv_data_page(pa); + count = PAGE_SIZE; +#else + if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) { + mc88110_wbinv_data_page(pa); + count = PAGE_SIZE; + } else { + mc88110_wbinv_data_line(pa); + count = MC88110_CACHE_LINE; + } +#endif + pa += count; + size -= count; + } + CMMU_LOCK; mc88410_wb(); CMMU_UNLOCK; @@ -669,24 +707,22 @@ m88410_icache_inv(cpuid_t cpu, paddr_t pa, psize_t size) } /* - * Sync dcache - icache is never dirty but needs to be invalidated as well. + * writeback D$ */ - void -m88110_cmmu_sync_cache(paddr_t pa, psize_t size) +m88110_cmmu_wb_locked(paddr_t pa, psize_t size) { -#ifdef ENABLE_88110_ERRATA_17 - mc88110_wb_data_page(pa); -#else if (size <= MC88110_CACHE_LINE) mc88110_wb_data_line(pa); else mc88110_wb_data_page(pa); -#endif } +/* + * writeback and invalidate D$ + */ void -m88110_cmmu_sync_inval_cache(paddr_t pa, psize_t size) +m88110_cmmu_wbinv_locked(paddr_t pa, psize_t size) { #ifdef ENABLE_88110_ERRATA_17 mc88110_wb_data_page(pa); @@ -699,8 +735,11 @@ m88110_cmmu_sync_inval_cache(paddr_t pa, psize_t size) #endif } +/* + * invalidate D$ + */ void -m88110_cmmu_inval_cache(paddr_t pa, psize_t size) +m88110_cmmu_inv_locked(paddr_t pa, psize_t size) { /* * I'd love to do this... @@ -711,8 +750,7 @@ m88110_cmmu_inval_cache(paddr_t pa, psize_t size) mc88110_inval_data_page(pa); * ... but there is no mc88110_inval_data_page(). Callers know - * this and turn invalidates into syncs with invalidate for page - * or larger areas. + * this and always do this line-by-line. */ mc88110_inval_data_line(pa); } @@ -742,134 +780,155 @@ m88110_dma_cachectl(paddr_t _pa, psize_t _size, int op) switch (op) { case DMA_CACHE_SYNC: - flusher = m88110_cmmu_sync_cache; + /* + * If the range does not span complete cache lines, + * force invalidation of the incomplete lines. The + * rationale behind this is that these incomplete lines + * will probably need to be invalidated later, and + * we do not want to risk having stale data in the way. + */ + if (pa != _pa || size != _size || size >= PAGE_SIZE) + flusher = m88110_cmmu_wbinv_locked; + else + flusher = m88110_cmmu_wb_locked; break; case DMA_CACHE_SYNC_INVAL: - flusher = m88110_cmmu_sync_inval_cache; + flusher = m88110_cmmu_wbinv_locked; break; default: - if (pa != _pa || size != _size || size >= PAGE_SIZE) - flusher = m88110_cmmu_sync_inval_cache; - else - flusher = m88110_cmmu_inval_cache; + flusher = m88110_cmmu_inv_locked; break; } +#ifdef ENABLE_88110_ERRATA_17 + if (flusher == m88110_cmmu_wbinv_locked) { + pa = trunc_page(_pa); + size = trunc_page(_pa + _size) - pa; + } +#endif + psr = get_psr(); set_psr(psr | PSR_IND); if (op != DMA_CACHE_SYNC) mc88110_inval_inst(); - while (size != 0) { - count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ? - PAGE_SIZE : MC88110_CACHE_LINE; - - (*flusher)(pa, count); - - pa += count; - size -= count; + if (flusher == m88110_cmmu_inv_locked) { + while (size != 0) { + count = MC88110_CACHE_LINE; + (*flusher)(pa, count); + pa += count; + size -= count; + } + } else { + while (size != 0) { + count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ? + PAGE_SIZE : MC88110_CACHE_LINE; + (*flusher)(pa, count); + pa += count; + size -= count; + } } set_psr(psr); } void -m88410_dma_cachectl_local(paddr_t pa, psize_t size, int op) +m88410_dma_cachectl_local(paddr_t _pa, psize_t _size, int op) { u_int32_t psr; - psize_t count; + paddr_t pa; + psize_t size, count; void (*flusher)(paddr_t, psize_t); void (*ext_flusher)(void); + if (op == DMA_CACHE_SYNC) { + /* + * Enlarge the range to integral pages, to match the + * 88410 operation granularity. + */ + pa = trunc_page(_pa); + size = trunc_page(_pa + _size) - pa; + } else { + pa = trunc_cache_line(_pa); + size = round_cache_line(_pa + _size) - pa; + } + switch (op) { case DMA_CACHE_SYNC: -#if 0 - flusher = m88110_cmmu_sync_cache; - ext_flusher = mc88410_wb; -#endif + /* + * If the range does not span complete cache lines, + * force invalidation of the incomplete lines. The + * rationale behind this is that these incomplete lines + * will probably need to be invalidated later, and + * we do not want to risk having stale data in the way. + */ + if (pa != _pa || size != _size || size >= PAGE_SIZE) + flusher = m88110_cmmu_wbinv_locked; + else + flusher = m88110_cmmu_wb_locked; break; case DMA_CACHE_SYNC_INVAL: - flusher = m88110_cmmu_sync_inval_cache; + flusher = m88110_cmmu_wbinv_locked; ext_flusher = mc88410_wbinv; break; default: -#ifdef ENABLE_88110_ERRATA_17 - flusher = m88110_cmmu_sync_inval_cache; -#else - flusher = m88110_cmmu_inval_cache; -#endif + flusher = m88110_cmmu_inv_locked; #ifdef notyet - ext_flusher = mc88410_inval; + ext_flusher = mc88410_inv; #else ext_flusher = mc88410_wbinv; #endif break; } +#ifdef ENABLE_88110_ERRATA_17 + if (flusher == m88110_cmmu_wbinv_locked) { + pa = trunc_page(_pa); + size = trunc_page(_pa + _size) - pa; + } +#endif + psr = get_psr(); set_psr(psr | PSR_IND); - if (op == DMA_CACHE_SYNC) { - CMMU_LOCK; + if (op != DMA_CACHE_SYNC) + mc88110_inval_inst(); + if (flusher == m88110_cmmu_inv_locked) { while (size != 0) { - m88110_cmmu_sync_cache(pa, PAGE_SIZE); - mc88410_wb_page(pa); - pa += PAGE_SIZE; - size -= PAGE_SIZE; + count = MC88110_CACHE_LINE; + (*flusher)(pa, count); + pa += count; + size -= count; } - CMMU_UNLOCK; } else { - mc88110_inval_inst(); while (size != 0) { -#ifdef ENABLE_88110_ERRATA_17 - count = PAGE_SIZE; -#else count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ? PAGE_SIZE : MC88110_CACHE_LINE; -#endif - (*flusher)(pa, count); - pa += count; size -= count; } - CMMU_LOCK; + } + + + CMMU_LOCK; + if (op == DMA_CACHE_SYNC) { + while (size != 0) { + mc88410_wb_page(pa); + pa += PAGE_SIZE; + size -= PAGE_SIZE; + } + } else { (*ext_flusher)(); - CMMU_UNLOCK; } + CMMU_UNLOCK; set_psr(psr); } void -m88410_dma_cachectl(paddr_t _pa, psize_t _size, int op) +m88410_dma_cachectl(paddr_t pa, psize_t size, int op) { - paddr_t pa; - psize_t size; - -#ifdef ENABLE_88110_ERRATA_17 - pa = trunc_page(_pa); - size = round_page(_pa + _size) - pa; - -#if 0 /* not required since m88410_dma_cachectl_local() behaves identically */ - if (op == DMA_CACHE_INV) - op = DMA_CACHE_SYNC_INVAL; -#endif -#else - if (op == DMA_CACHE_SYNC) { - pa = trunc_page(_pa); - size = round_page(_pa + _size) - pa; - } else { - pa = trunc_cache_line(_pa); - size = round_cache_line(_pa + _size) - pa; - - if (op == DMA_CACHE_INV) { - if (pa != _pa || size != _size || size >= PAGE_SIZE) - op = DMA_CACHE_SYNC_INVAL; - } - } -#endif - m88410_dma_cachectl_local(pa, size, op); #ifdef MULTIPROCESSOR /* |