summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2010-12-31 21:16:32 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2010-12-31 21:16:32 +0000
commit33197356cbeb38b4af528365cdc86fbe2d977baa (patch)
tree632577701ecce814cb7260b410742d74e2606e14
parentcb9d6cf2e50dbd1a414334f0849662d34214613d (diff)
Yet another rework of the cache flushing routines. Fixes some bugs, probably
introduces new ones as well. Main highlights are: - 88200 lines which got marked as unusable by the BUG selftests will not be reenabled at CMMU initialization time. - better granularity in the 88110/88410 routines, to operate on ranges closer to the actual requested area, errata permitting.
-rw-r--r--sys/arch/m88k/m88k/m8820x_machdep.c265
-rw-r--r--sys/arch/mvme88k/mvme88k/m88110.c297
2 files changed, 351 insertions, 211 deletions
diff --git a/sys/arch/m88k/m88k/m8820x_machdep.c b/sys/arch/m88k/m88k/m8820x_machdep.c
index e9e2cb0b4c2..96a1a799a25 100644
--- a/sys/arch/m88k/m88k/m8820x_machdep.c
+++ b/sys/arch/m88k/m88k/m8820x_machdep.c
@@ -1,27 +1,18 @@
-/* $OpenBSD: m8820x_machdep.c,v 1.43 2010/12/31 21:12:16 miod Exp $ */
+/* $OpenBSD: m8820x_machdep.c,v 1.44 2010/12/31 21:16:31 miod Exp $ */
/*
- * Copyright (c) 2004, 2007, Miodrag Vallat.
+ * Copyright (c) 2004, 2007, 2010, Miodrag Vallat.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Copyright (c) 2001 Steve Murphree, Jr.
@@ -126,7 +117,8 @@ struct cmmu_p cmmu8820x = {
};
/*
- * Systems with more than 2 CMMUs per CPU use programmable split schemes.
+ * Systems with more than 2 CMMUs per CPU use split schemes, which sometimes
+ * are programmable (well, no more than having a few hardwired choices).
*
* The following schemes are available on MVME188 boards:
* - split on A12 address bit (A14 for 88204)
@@ -142,8 +134,8 @@ struct cmmu_p cmmu8820x = {
* splits seem less efficient.
*
* The really nasty part of this choice is in the exception handling code,
- * when it needs to get error information from up to 4 CMMUs. See eh.S on
- * mvme88k for the gory details, luna88k is more sane.
+ * when it needs to get error information from up to 4 CMMUs. See eh.S for
+ * the gory details.
*/
struct m8820x_cmmu m8820x_cmmu[MAX_CMMUS];
@@ -154,11 +146,11 @@ u_int cmmu_shift;
void m8820x_cmmu_set_reg(int, u_int, int, int, int);
void m8820x_cmmu_set_cmd(u_int, int, int, int, vaddr_t);
void m8820x_cmmu_wait(int);
-void m8820x_cmmu_sync_cache(int, paddr_t, psize_t);
-void m8820x_cmmu_sync_inval_cache(int, paddr_t, psize_t);
-void m8820x_cmmu_inval_cache(int, paddr_t, psize_t);
+void m8820x_cmmu_wb_locked(int, paddr_t, psize_t);
+void m8820x_cmmu_wbinv_locked(int, paddr_t, psize_t);
+void m8820x_cmmu_inv_locked(int, paddr_t, psize_t);
-/* Flags passed to m8820x_cmmu_set() */
+/* Flags passed to m8820x_cmmu_set_*() */
#define MODE_VAL 0x01
#define ADDR_VAL 0x02
@@ -252,9 +244,7 @@ m8820x_cmmu_wait(int cpu)
panic("cache flush failed!");
}
#else
- /* force the read access, but do not issue this statement... */
- __asm__ __volatile__ ("|or r0, r0, %0" ::
- "r" (cmmu->cmmu_regs[CMMU_SSR]));
+ (void)cmmu->cmmu_regs[CMMU_SSR];
#endif
}
}
@@ -398,7 +388,7 @@ m8820x_initialize_cpu(cpuid_t cpu)
struct cpu_info *ci;
struct m8820x_cmmu *cmmu;
u_int line, cnt;
- int cssp, sctr, type;
+ int cssp, type;
apr_t apr;
apr = ((0x00000 << PG_BITS) | CACHE_WT | CACHE_GLOBAL | CACHE_INH) &
@@ -440,7 +430,20 @@ m8820x_initialize_cpu(cpuid_t cpu)
for (line = 0; line <= 255; line++) {
cmmu->cmmu_regs[CMMU_SAR] =
line << MC88200_CACHE_SHIFT;
+ if (cmmu->cmmu_regs[CMMU_CSSP(cssp)] &
+ (CMMU_CSSP_D3 | CMMU_CSSP_D2 |
+ CMMU_CSSP_D1 | CMMU_CSSP_D0)) {
+ printf("cpu%d: CMMU@%p has disabled"
+ " cache lines in set 0x%03x,"
+ " cssp %08x\n",
+ cpu, cmmu->cmmu_regs,
+ (cssp << 8) | line,
+ cmmu->cmmu_regs[CMMU_CSSP(cssp)]);
+ }
cmmu->cmmu_regs[CMMU_CSSP(cssp)] =
+ (cmmu->cmmu_regs[CMMU_CSSP(cssp)] &
+ ~(CMMU_CSSP_D3 | CMMU_CSSP_D2 |
+ CMMU_CSSP_D1 | CMMU_CSSP_D0)) |
CMMU_CSSP_L5 | CMMU_CSSP_L4 |
CMMU_CSSP_L3 | CMMU_CSSP_L2 |
CMMU_CSSP_L1 | CMMU_CSSP_L0 |
@@ -452,18 +455,11 @@ m8820x_initialize_cpu(cpuid_t cpu)
/*
* Set the SCTR, SAPR, and UAPR to some known state.
- * Snooping is enabled as soon as the system uses more than
- * two CMMUs; for instruction CMMUs as well so that we can
- * share breakpoints.
+ * Snooping is always enabled, so that we do not need to
+ * writeback userland code pages when they first get filled
+ * as data pages.
*/
- sctr = 0;
- if (cmmu_shift > 1)
- sctr |= CMMU_SCTR_SE;
-#ifdef MULTIPROCESSOR
- if (ncpusfound > 1)
- sctr |= CMMU_SCTR_SE;
-#endif
- cmmu->cmmu_regs[CMMU_SCTR] = sctr;
+ cmmu->cmmu_regs[CMMU_SCTR] = CMMU_SCTR_SE;
cmmu->cmmu_regs[CMMU_SAPR] = cmmu->cmmu_regs[CMMU_UAPR] = apr;
@@ -472,8 +468,7 @@ m8820x_initialize_cpu(cpuid_t cpu)
cmmu->cmmu_regs[CMMU_BWP4] = cmmu->cmmu_regs[CMMU_BWP5] =
cmmu->cmmu_regs[CMMU_BWP6] = cmmu->cmmu_regs[CMMU_BWP7] = 0;
cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_CACHE_INV_ALL;
- __asm__ __volatile__ ("|or r0, r0, %0" ::
- "r" (cmmu->cmmu_regs[CMMU_SSR]));
+ (void)cmmu->cmmu_regs[CMMU_SSR];
cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_SUPER_ALL;
cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_USER_ALL;
}
@@ -543,9 +538,6 @@ m8820x_set_uapr(apr_t ap)
* Functions that invalidate TLB entries.
*/
-/*
- * flush any tlb
- */
void
m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count)
{
@@ -589,11 +581,10 @@ m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count)
/*
* Functions that invalidate caches.
*
- * Cache invalidates require physical addresses.
+ * Cache operations require physical addresses.
*
- * We don't push Instruction Caches prior to invalidate because they are not
- * snooped and never modified (I guess it doesn't matter then which form
- * of the command we use then).
+ * We don't writeback instruction caches prior to invalidate because they
+ * are never modified.
*
* Note that on systems with more than two CMMUs per CPU, we can not benefit
* from the address split - the split is done on virtual (not translated yet)
@@ -604,7 +595,7 @@ m8820x_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count)
#define round_cache_line(a) trunc_cache_line((a) + MC88200_CACHE_LINE - 1)
/*
- * flush both Instruction and Data caches
+ * invalidate I$, writeback and invalidate D$
*/
void
m8820x_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size)
@@ -709,10 +700,10 @@ m8820x_icache_inv(cpuid_t cpu, paddr_t pa, psize_t size)
}
/*
- * sync dcache - icache is never dirty but needs to be invalidated as well.
+ * writeback D$
*/
void
-m8820x_cmmu_sync_cache(int cpu, paddr_t pa, psize_t size)
+m8820x_cmmu_wb_locked(int cpu, paddr_t pa, psize_t size)
{
if (size <= MC88200_CACHE_LINE) {
m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CB_LINE,
@@ -724,25 +715,27 @@ m8820x_cmmu_sync_cache(int cpu, paddr_t pa, psize_t size)
m8820x_cmmu_wait(cpu);
}
+/*
+ * invalidate I$, writeback and invalidate D$
+ */
void
-m8820x_cmmu_sync_inval_cache(int cpu, paddr_t pa, psize_t size)
+m8820x_cmmu_wbinv_locked(int cpu, paddr_t pa, psize_t size)
{
if (size <= MC88200_CACHE_LINE) {
- m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_LINE,
- MODE_VAL, cpu, INST_CMMU, pa);
m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_LINE,
- MODE_VAL, cpu, DATA_CMMU, pa);
+ MODE_VAL, cpu, 0, pa);
} else {
- m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_PAGE,
- MODE_VAL, cpu, INST_CMMU, pa);
m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_PAGE,
- MODE_VAL, cpu, DATA_CMMU, pa);
+ MODE_VAL, cpu, 0, pa);
}
m8820x_cmmu_wait(cpu);
}
+/*
+ * invalidate I$ and D$
+ */
void
-m8820x_cmmu_inval_cache(int cpu, paddr_t pa, psize_t size)
+m8820x_cmmu_inv_locked(int cpu, paddr_t pa, psize_t size)
{
if (size <= MC88200_CACHE_LINE) {
m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_LINE, 0, cpu, 0, pa);
@@ -771,31 +764,114 @@ m8820x_dma_cachectl(paddr_t _pa, psize_t _size, int op)
paddr_t pa;
psize_t size, count;
void (*flusher)(int, paddr_t, psize_t);
+ struct {
+ paddr_t pa;
+ psize_t size;
+ void (*flusher)(int, paddr_t, psize_t);
+ } ops[3], *curop;
+ uint nops;
pa = trunc_cache_line(_pa);
size = round_cache_line(_pa + _size) - pa;
+ nops = 0;
+ curop = ops;
switch (op) {
case DMA_CACHE_SYNC:
- flusher = m8820x_cmmu_sync_cache;
+ /*
+ * If the range does not span complete cache lines,
+ * force invalidation of the incomplete lines. The
+ * rationale behind this is that these incomplete lines
+ * will probably need to be invalidated later, and
+ * we do not want to risk having stale data in the way.
+ */
+ if (pa != _pa) {
+ curop->pa = pa;
+ curop->size = MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_wbinv_locked;
+ curop++;
+ pa += MC88200_CACHE_LINE;
+ size -= MC88200_CACHE_LINE;
+ if (size == 0)
+ break;
+ }
+ if (pa + size == _pa + _size) {
+ curop->pa = pa;
+ curop->size = size;
+ curop->flusher = m8820x_cmmu_wb_locked;
+ curop++;
+ } else {
+ if (size != MC88200_CACHE_LINE) {
+ curop->pa = pa;
+ curop->size = size - MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_wb_locked;
+ pa += curop->size;
+ curop++;
+ }
+ curop->pa = pa;
+ curop->size = MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_wbinv_locked;
+ curop++;
+ }
break;
case DMA_CACHE_SYNC_INVAL:
- flusher = m8820x_cmmu_sync_inval_cache;
+ curop->pa = pa;
+ curop->size = size;
+ curop->flusher = m8820x_cmmu_wbinv_locked;
+ curop++;
break;
default:
- if (pa != _pa || size != _size) {
- /*
- * Theoretically, we should preserve the data from
- * the two incomplete cache lines.
- * However, callers are expected to have asked
- * for a cache sync before, so we do not risk too
- * much by not doing this.
- */
+ case DMA_CACHE_INV:
+#if 0
+ /*
+ * Preserve the data from the incomplete cache lines (up to
+ * two), and discard the lines in-between (if any).
+ */
+ if (pa != _pa) {
+ curop->pa = pa;
+ curop->size = MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_wbinv_locked;
+ curop++;
+ pa += MC88200_CACHE_LINE;
+ size -= MC88200_CACHE_LINE;
+ if (size == 0)
+ break;
+ }
+ if (pa + size == _pa + _size) {
+ curop->pa = pa;
+ curop->size = size;
+ curop->flusher = m8820x_cmmu_inv_locked;
+ curop++;
+ } else {
+ if (size != MC88200_CACHE_LINE) {
+ curop->pa = pa;
+ curop->size = size - MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_inv_locked;
+ pa += curop->size;
+ curop++;
+ }
+ curop->pa = pa;
+ curop->size = MC88200_CACHE_LINE;
+ curop->flusher = m8820x_cmmu_wbinv_locked;
+ curop++;
}
- flusher = m8820x_cmmu_inval_cache;
+#else
+ /*
+ * Even if there are incomplete cache lines affected, assume
+ * they were evicted earlier.
+ * XXX We ought to save the partial cache lines, invalidate,
+ * XXX and put outside-the-range bytes back...
+ */
+ curop->pa = pa;
+ curop->size = size;
+ curop->flusher = m8820x_cmmu_inv_locked;
+ curop++;
+#endif
break;
}
+ nops = curop - ops;
+
#ifndef MULTIPROCESSOR
cpu = cpu_number();
#endif
@@ -804,31 +880,36 @@ m8820x_dma_cachectl(paddr_t _pa, psize_t _size, int op)
set_psr(psr | PSR_IND);
CMMU_LOCK;
- while (size != 0) {
- count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
+ for (curop = ops; nops != 0; curop++, nops--) {
+ pa = curop->pa;
+ size = curop->size;
+ flusher = curop->flusher;
+ while (size != 0) {
+ count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
PAGE_SIZE : MC88200_CACHE_LINE;
#ifdef MULTIPROCESSOR
- /* writeback on a single cpu... */
- (*flusher)(ci->ci_cpuid, pa, count);
-
- /* invalidate on all... */
- if (flusher != m8820x_cmmu_sync_cache) {
- for (cpu = 0; cpu < MAX_CPUS; cpu++) {
- if (!ISSET(m88k_cpus[cpu].ci_flags,
- CIF_ALIVE))
- continue;
- if (cpu == ci->ci_cpuid)
- continue;
- m8820x_cmmu_inval_cache(cpu, pa, count);
+ /* writeback on a single cpu... */
+ (*flusher)(ci->ci_cpuid, pa, count);
+
+ /* invalidate on all... */
+ if (flusher != m8820x_cmmu_wb_locked) {
+ for (cpu = 0; cpu < MAX_CPUS; cpu++) {
+ if (!ISSET(m88k_cpus[cpu].ci_flags,
+ CIF_ALIVE))
+ continue;
+ if (cpu == ci->ci_cpuid)
+ continue;
+ m8820x_cmmu_inv_locked(cpu, pa, count);
+ }
}
- }
#else /* MULTIPROCESSOR */
- (*flusher)(cpu, pa, count);
+ (*flusher)(cpu, pa, count);
#endif /* MULTIPROCESSOR */
- pa += count;
- size -= count;
+ pa += count;
+ size -= count;
+ }
}
CMMU_UNLOCK;
diff --git a/sys/arch/mvme88k/mvme88k/m88110.c b/sys/arch/mvme88k/mvme88k/m88110.c
index ab28fc5e9e9..25ad03cf14a 100644
--- a/sys/arch/mvme88k/mvme88k/m88110.c
+++ b/sys/arch/mvme88k/mvme88k/m88110.c
@@ -1,4 +1,20 @@
-/* $OpenBSD: m88110.c,v 1.67 2010/12/31 21:12:16 miod Exp $ */
+/* $OpenBSD: m88110.c,v 1.68 2010/12/31 21:16:31 miod Exp $ */
+
+/*
+ * Copyright (c) 2010 Miodrag Vallat.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
/*
* Copyright (c) 1998 Steve Murphree, Jr.
* All rights reserved.
@@ -148,9 +164,9 @@ struct cmmu_p cmmu88410 = {
void patc_clear(void);
-void m88110_cmmu_sync_cache(paddr_t, psize_t);
-void m88110_cmmu_sync_inval_cache(paddr_t, psize_t);
-void m88110_cmmu_inval_cache(paddr_t, psize_t);
+void m88110_cmmu_wb_locked(paddr_t, psize_t);
+void m88110_cmmu_wbinv_locked(paddr_t, psize_t);
+void m88110_cmmu_inv_locked(paddr_t, psize_t);
void
patc_clear(void)
@@ -358,35 +374,7 @@ m88410_initialize_cpu(cpuid_t cpu)
dctl |= CMMU_DCTL_SEN;
set_dctl(dctl);
CMMU_LOCK;
-#if 0
- mc88410_inval(); /* clear external data cache */
-#else
- /*
- * We can't invalidate the 88410 cache without flushing it first;
- * this is probably due to either an error in the cpu-to-88410
- * communication protocol, or to a bug in the '410 (but since I
- * do not know how to get its revision, I can't tell whether this
- * is the obscure v1 bug or not).
- *
- * Since we can't flush random data either, fill the secondary
- * cache first, before flushing it.
- *
- * The smallest 88410 cache line is 32 bytes, and the largest size
- * is 1MB.
- */
- {
- vaddr_t va;
- uint32_t junk = 0;
-
- for (va = 0; va < 1024 * 1024; va += 32)
- junk += *(uint32_t *)va;
-
- /* to make sure the above loop isn't optimized away */
- mc88110_wbinv_data_page(junk & PAGE_SIZE);
- }
- mc88410_wb();
- mc88410_inval();
-#endif
+ mc88410_inv(); /* clear external data cache */
CMMU_UNLOCK;
}
@@ -505,7 +493,7 @@ m88110_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count)
* This really only matters to us when running a MULTIPROCESSOR kernel
* (otherwise there is no snooping happening), and given the intrusive
* changes it requires (see the comment about invalidates being turned
- * into flushes with invalidate in m88110_cmmu_inval_cache below), as
+ * into flushes with invalidate in m88110_cmmu_inv_locked below), as
* well as the small performance impact it has), we define a specific
* symbol to enable the suggested workaround.
*
@@ -520,19 +508,44 @@ m88110_tlb_inv(cpuid_t cpu, u_int kernel, vaddr_t vaddr, u_int count)
#define round_cache_line(a) trunc_cache_line((a) + MC88110_CACHE_LINE - 1)
/*
- * Flush both Instruction and Data caches
+ * invalidate I$, writeback and invalidate D$
*/
void
m88110_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size)
{
u_int32_t psr;
+ psize_t count;
+
+#ifdef ENABLE_88110_ERRATA_17
+ size = round_page(pa + size) - trunc_page(pa);
+ pa = trunc_page(pa);
+#else
+ size = round_cache_line(pa + size) - trunc_cache_line(pa);
+ pa = trunc_cache_line(pa);
+#endif
psr = get_psr();
set_psr(psr | PSR_IND);
mc88110_inval_inst();
- mc88110_wb_data();
+ while (size != 0) {
+#ifdef ENABLE_88110_ERRATA_17
+ mc88110_wb_data_page(pa);
+ mc88110_wbinv_data_page(pa);
+ count = PAGE_SIZE;
+#else
+ if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) {
+ mc88110_wbinv_data_page(pa);
+ count = PAGE_SIZE;
+ } else {
+ mc88110_wbinv_data_line(pa);
+ count = MC88110_CACHE_LINE;
+ }
+#endif
+ pa += count;
+ size -= count;
+ }
set_psr(psr);
}
@@ -541,6 +554,7 @@ void
m88410_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size)
{
u_int32_t psr;
+ psize_t count;
#ifdef MULTIPROCESSOR
struct cpu_info *ci = curcpu();
@@ -550,12 +564,36 @@ m88410_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size)
}
#endif
+#ifdef ENABLE_88110_ERRATA_17
+ size = round_page(pa + size) - trunc_page(pa);
+ pa = trunc_page(pa);
+#else
+ size = round_cache_line(pa + size) - trunc_cache_line(pa);
+ pa = trunc_cache_line(pa);
+#endif
+
psr = get_psr();
set_psr(psr | PSR_IND);
mc88110_inval_inst();
- /* flush all data to avoid errata invalidate */
- mc88110_wb_data();
+ while (size != 0) {
+#ifdef ENABLE_88110_ERRATA_17
+ mc88110_wb_data_page(pa);
+ mc88110_wbinv_data_page(pa);
+ count = PAGE_SIZE;
+#else
+ if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) {
+ mc88110_wbinv_data_page(pa);
+ count = PAGE_SIZE;
+ } else {
+ mc88110_wbinv_data_line(pa);
+ count = MC88110_CACHE_LINE;
+ }
+#endif
+ pa += count;
+ size -= count;
+ }
+
CMMU_LOCK;
mc88410_wb();
CMMU_UNLOCK;
@@ -669,24 +707,22 @@ m88410_icache_inv(cpuid_t cpu, paddr_t pa, psize_t size)
}
/*
- * Sync dcache - icache is never dirty but needs to be invalidated as well.
+ * writeback D$
*/
-
void
-m88110_cmmu_sync_cache(paddr_t pa, psize_t size)
+m88110_cmmu_wb_locked(paddr_t pa, psize_t size)
{
-#ifdef ENABLE_88110_ERRATA_17
- mc88110_wb_data_page(pa);
-#else
if (size <= MC88110_CACHE_LINE)
mc88110_wb_data_line(pa);
else
mc88110_wb_data_page(pa);
-#endif
}
+/*
+ * writeback and invalidate D$
+ */
void
-m88110_cmmu_sync_inval_cache(paddr_t pa, psize_t size)
+m88110_cmmu_wbinv_locked(paddr_t pa, psize_t size)
{
#ifdef ENABLE_88110_ERRATA_17
mc88110_wb_data_page(pa);
@@ -699,8 +735,11 @@ m88110_cmmu_sync_inval_cache(paddr_t pa, psize_t size)
#endif
}
+/*
+ * invalidate D$
+ */
void
-m88110_cmmu_inval_cache(paddr_t pa, psize_t size)
+m88110_cmmu_inv_locked(paddr_t pa, psize_t size)
{
/*
* I'd love to do this...
@@ -711,8 +750,7 @@ m88110_cmmu_inval_cache(paddr_t pa, psize_t size)
mc88110_inval_data_page(pa);
* ... but there is no mc88110_inval_data_page(). Callers know
- * this and turn invalidates into syncs with invalidate for page
- * or larger areas.
+ * this and always do this line-by-line.
*/
mc88110_inval_data_line(pa);
}
@@ -742,134 +780,155 @@ m88110_dma_cachectl(paddr_t _pa, psize_t _size, int op)
switch (op) {
case DMA_CACHE_SYNC:
- flusher = m88110_cmmu_sync_cache;
+ /*
+ * If the range does not span complete cache lines,
+ * force invalidation of the incomplete lines. The
+ * rationale behind this is that these incomplete lines
+ * will probably need to be invalidated later, and
+ * we do not want to risk having stale data in the way.
+ */
+ if (pa != _pa || size != _size || size >= PAGE_SIZE)
+ flusher = m88110_cmmu_wbinv_locked;
+ else
+ flusher = m88110_cmmu_wb_locked;
break;
case DMA_CACHE_SYNC_INVAL:
- flusher = m88110_cmmu_sync_inval_cache;
+ flusher = m88110_cmmu_wbinv_locked;
break;
default:
- if (pa != _pa || size != _size || size >= PAGE_SIZE)
- flusher = m88110_cmmu_sync_inval_cache;
- else
- flusher = m88110_cmmu_inval_cache;
+ flusher = m88110_cmmu_inv_locked;
break;
}
+#ifdef ENABLE_88110_ERRATA_17
+ if (flusher == m88110_cmmu_wbinv_locked) {
+ pa = trunc_page(_pa);
+ size = trunc_page(_pa + _size) - pa;
+ }
+#endif
+
psr = get_psr();
set_psr(psr | PSR_IND);
if (op != DMA_CACHE_SYNC)
mc88110_inval_inst();
- while (size != 0) {
- count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
- PAGE_SIZE : MC88110_CACHE_LINE;
-
- (*flusher)(pa, count);
-
- pa += count;
- size -= count;
+ if (flusher == m88110_cmmu_inv_locked) {
+ while (size != 0) {
+ count = MC88110_CACHE_LINE;
+ (*flusher)(pa, count);
+ pa += count;
+ size -= count;
+ }
+ } else {
+ while (size != 0) {
+ count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
+ PAGE_SIZE : MC88110_CACHE_LINE;
+ (*flusher)(pa, count);
+ pa += count;
+ size -= count;
+ }
}
set_psr(psr);
}
void
-m88410_dma_cachectl_local(paddr_t pa, psize_t size, int op)
+m88410_dma_cachectl_local(paddr_t _pa, psize_t _size, int op)
{
u_int32_t psr;
- psize_t count;
+ paddr_t pa;
+ psize_t size, count;
void (*flusher)(paddr_t, psize_t);
void (*ext_flusher)(void);
+ if (op == DMA_CACHE_SYNC) {
+ /*
+ * Enlarge the range to integral pages, to match the
+ * 88410 operation granularity.
+ */
+ pa = trunc_page(_pa);
+ size = trunc_page(_pa + _size) - pa;
+ } else {
+ pa = trunc_cache_line(_pa);
+ size = round_cache_line(_pa + _size) - pa;
+ }
+
switch (op) {
case DMA_CACHE_SYNC:
-#if 0
- flusher = m88110_cmmu_sync_cache;
- ext_flusher = mc88410_wb;
-#endif
+ /*
+ * If the range does not span complete cache lines,
+ * force invalidation of the incomplete lines. The
+ * rationale behind this is that these incomplete lines
+ * will probably need to be invalidated later, and
+ * we do not want to risk having stale data in the way.
+ */
+ if (pa != _pa || size != _size || size >= PAGE_SIZE)
+ flusher = m88110_cmmu_wbinv_locked;
+ else
+ flusher = m88110_cmmu_wb_locked;
break;
case DMA_CACHE_SYNC_INVAL:
- flusher = m88110_cmmu_sync_inval_cache;
+ flusher = m88110_cmmu_wbinv_locked;
ext_flusher = mc88410_wbinv;
break;
default:
-#ifdef ENABLE_88110_ERRATA_17
- flusher = m88110_cmmu_sync_inval_cache;
-#else
- flusher = m88110_cmmu_inval_cache;
-#endif
+ flusher = m88110_cmmu_inv_locked;
#ifdef notyet
- ext_flusher = mc88410_inval;
+ ext_flusher = mc88410_inv;
#else
ext_flusher = mc88410_wbinv;
#endif
break;
}
+#ifdef ENABLE_88110_ERRATA_17
+ if (flusher == m88110_cmmu_wbinv_locked) {
+ pa = trunc_page(_pa);
+ size = trunc_page(_pa + _size) - pa;
+ }
+#endif
+
psr = get_psr();
set_psr(psr | PSR_IND);
- if (op == DMA_CACHE_SYNC) {
- CMMU_LOCK;
+ if (op != DMA_CACHE_SYNC)
+ mc88110_inval_inst();
+ if (flusher == m88110_cmmu_inv_locked) {
while (size != 0) {
- m88110_cmmu_sync_cache(pa, PAGE_SIZE);
- mc88410_wb_page(pa);
- pa += PAGE_SIZE;
- size -= PAGE_SIZE;
+ count = MC88110_CACHE_LINE;
+ (*flusher)(pa, count);
+ pa += count;
+ size -= count;
}
- CMMU_UNLOCK;
} else {
- mc88110_inval_inst();
while (size != 0) {
-#ifdef ENABLE_88110_ERRATA_17
- count = PAGE_SIZE;
-#else
count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
PAGE_SIZE : MC88110_CACHE_LINE;
-#endif
-
(*flusher)(pa, count);
-
pa += count;
size -= count;
}
- CMMU_LOCK;
+ }
+
+
+ CMMU_LOCK;
+ if (op == DMA_CACHE_SYNC) {
+ while (size != 0) {
+ mc88410_wb_page(pa);
+ pa += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ } else {
(*ext_flusher)();
- CMMU_UNLOCK;
}
+ CMMU_UNLOCK;
set_psr(psr);
}
void
-m88410_dma_cachectl(paddr_t _pa, psize_t _size, int op)
+m88410_dma_cachectl(paddr_t pa, psize_t size, int op)
{
- paddr_t pa;
- psize_t size;
-
-#ifdef ENABLE_88110_ERRATA_17
- pa = trunc_page(_pa);
- size = round_page(_pa + _size) - pa;
-
-#if 0 /* not required since m88410_dma_cachectl_local() behaves identically */
- if (op == DMA_CACHE_INV)
- op = DMA_CACHE_SYNC_INVAL;
-#endif
-#else
- if (op == DMA_CACHE_SYNC) {
- pa = trunc_page(_pa);
- size = round_page(_pa + _size) - pa;
- } else {
- pa = trunc_cache_line(_pa);
- size = round_cache_line(_pa + _size) - pa;
-
- if (op == DMA_CACHE_INV) {
- if (pa != _pa || size != _size || size >= PAGE_SIZE)
- op = DMA_CACHE_SYNC_INVAL;
- }
- }
-#endif
-
m88410_dma_cachectl_local(pa, size, op);
#ifdef MULTIPROCESSOR
/*