summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2012-05-27 14:32:06 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2012-05-27 14:32:06 +0000
commit54d101cc489130b36069c6585063b205e9d300e8 (patch)
tree8934be9bd4df63800035de8bc2b5fb3278aed9c3
parent94c758cad85dd5534431b713c81ce0ce8d74c4cf (diff)
Replace Loongson2F assembly cache routines with equivalent C code. This will
make future maintainance easier.
-rw-r--r--sys/arch/mips64/conf/files.mips644
-rw-r--r--sys/arch/mips64/mips64/cache_loongson2.S431
-rw-r--r--sys/arch/mips64/mips64/cache_loongson2.c315
3 files changed, 317 insertions, 433 deletions
diff --git a/sys/arch/mips64/conf/files.mips64 b/sys/arch/mips64/conf/files.mips64
index 9383b21aadd..0e0d5801095 100644
--- a/sys/arch/mips64/conf/files.mips64
+++ b/sys/arch/mips64/conf/files.mips64
@@ -1,4 +1,4 @@
-# $OpenBSD: files.mips64,v 1.18 2012/03/28 20:44:23 miod Exp $
+# $OpenBSD: files.mips64,v 1.19 2012/05/27 14:32:04 miod Exp $
file arch/mips64/mips64/arcbios.c arcbios
file arch/mips64/mips64/clock.c
@@ -14,7 +14,7 @@ file arch/mips64/mips64/sys_machdep.c
file arch/mips64/mips64/trap.c
file arch/mips64/mips64/vm_machdep.c
-file arch/mips64/mips64/cache_loongson2.S cpu_loongson2
+file arch/mips64/mips64/cache_loongson2.c cpu_loongson2
file arch/mips64/mips64/cache_r4k.c cpu_r4000
file arch/mips64/mips64/cache_r5k.S cpu_r5000 | cpu_rm7000
file arch/mips64/mips64/cache_r10k.S cpu_r10000
diff --git a/sys/arch/mips64/mips64/cache_loongson2.S b/sys/arch/mips64/mips64/cache_loongson2.S
deleted file mode 100644
index 480e37a291e..00000000000
--- a/sys/arch/mips64/mips64/cache_loongson2.S
+++ /dev/null
@@ -1,431 +0,0 @@
-/* $OpenBSD: cache_loongson2.S,v 1.8 2012/04/21 12:20:30 miod Exp $ */
-
-/*
- * Copyright (c) 2009 Miodrag Vallat.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-/*
- * Copyright (c) 1998-2004 Opsycon AB (www.opsycon.se)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-/*
- * Processors supported:
- * Loongson 2E/2F (code could be modified to work on 2C by not hardcoding
- * the number of ways).
- * The L1 virtual index bits, as well as the cache line size (32 bytes),
- * are hardcoded.
- */
-
-#include <machine/param.h>
-#include <machine/asm.h>
-#include <machine/cpu.h>
-#include <machine/regnum.h>
-
-#include "assym.h"
-
- .set mips3
-
-/* L1 cache operations */
-#define IndexInvalidate_I 0x00
-#define IndexWBInvalidate_D 0x01
-#define IndexLoadTag_D 0x05
-#define IndexStoreTag_D 0x09
-#define HitInvalidate_D 0x11
-#define HitWBInvalidate_D 0x15
-#define IndexLoadData_D 0x19
-#define IndexStoreData_D 0x1d
-
-/* L2 cache operations */
-#define IndexWBInvalidate_S 0x03
-#define IndexLoadTag_S 0x07
-#define IndexStoreTag_S 0x0b
-#define HitInvalidate_S 0x13
-#define HitWBInvalidate_S 0x17
-#define IndexLoadData_S 0x1b
-#define IndexStoreData_S 0x1f
-
-/*
- * Define cache type definition bits. NOTE! the 3 lsb may NOT change!
- */
-#define CTYPE_DIR 0x0001 /* Cache is direct mapped */
-#define CTYPE_2WAY 0x0002 /* Cache is TWO way */
-#define CTYPE_4WAY 0x0004 /* Cache is FOUR way */
-#define CTYPE_WAYMASK 0x0007
-
-#define CTYPE_HAS_IL2 0x0100 /* Internal L2 Cache present */
-#define CTYPE_HAS_XL2 0x0200 /* External L2 Cache present */
-#define CTYPE_HAS_XL3 0x0400 /* External L3 Cache present */
-
- .set noreorder # Noreorder is default style!
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_ConfigCache(struct cpu_info *ci) --
- *
- * Setup various cache-dependent variables:
- * The size of the data cache is stored into ci_l1datacachesize.
- * The size of instruction cache is stored into ci_l1instcachesize.
- * Alignment mask for cache aliasing test is stored in cache_valias_mask.
- * ci_l2size is set to the size of the secondary cache.
- * ci_l3size is set to the size of the tertiary cache.
- * ci_cacheways is set to 0 for direct mapped caches, 2 for two way
- * caches and 4 for four way caches. This primarily indicates the
- * primary cache associativity.
- *
- * Allocation:
- * ta0, ta1 ta2 used to hold I and D set size and Alias mask.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_ConfigCache, 0)
- mfc0 v0, COP_0_CONFIG # Get configuration register
-
- srl t1, v0, 9 # Get I cache size.
- and t1, 7
- li t2, 4096
- sllv ta0, t2, t1 # ta0 = Initial I set size.
-
- and t2, v0, 0x20
- srl t2, t2, 1 # Get I cache line size.
- addu t2, t2, 16
- sw t2, CI_L1INSTCACHELINE(a0)
-
- srl t1, v0, 6 # Get D cache size.
- and t1, 7
- li t2, 4096 # Fixed page size.
- sllv ta1, t2, t1
-
- and t2, v0, 0x10
- addu t2, t2, 16 # Get D cache line size.
- sw t2, CI_L1DATACACHELINE(a0)
-
- li ta3, 0 # Tertiary size 0.
-
- li ta2, 512 * 1024 # fixed L2 size...
- li t2, (CTYPE_4WAY|CTYPE_HAS_IL2) # caches are 4-way, internal L2
-
-/*
- * Get here with t2 = Cache type, ta0 = L1 I size, ta1 = L1 D size.
- * ta2 = secondary size, ta3 = tertiary size.
- */
-ConfResult:
- sw t2, CI_CACHECONFIGURATION(a0) # Save cache attributes
- and t2, CTYPE_WAYMASK # isolate number of sets.
- sw t2, CI_CACHEWAYS(a0)
- srl t2, 1 # get div shift for set size.
-
- sw ta2, CI_L2SIZE(a0)
- sw ta3, CI_L3SIZE(a0)
-
- addu t1, ta0, -1 # Use icache for alias mask
- srl t1, t2
- and t1, ~(PAGE_SIZE - 1)
- beqz t1, 1f
- nop
- or t1, (PAGE_SIZE - 1)
-1:
- PTR_S t1, cache_valias_mask
- PTR_S t1, pmap_prefer_mask
-
- sw ta0, CI_L1INSTCACHESIZE(a0) # store cache size.
- sw ta1, CI_L1DATACACHESIZE(a0) # store cache size.
-
- /*
- * Cache way number encoding is done in the lowest bits, and
- * these variables are not used. We make them nonzero so
- * that `mi' code can divide by them if necessary.
- */
- li ta1, 1
- sw ta1, CI_L1INSTCACHESET(a0)
- sw ta1, CI_L1DATACACHESET(a0)
-
- j ra
- nop
-END(Loongson2_ConfigCache)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_SyncCache(struct cpu_info *ci) --
- *
- * Sync ALL caches.
- * No need to look at number of sets since we are cleaning out
- * the entire cache and thus will address all sets anyway.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_SyncCache, 0)
- sync
-
- lw t1, CI_L1INSTCACHESIZE(a0)
- srl t1, t1, 2 # / 4ways
- lw t2, CI_L1DATACACHESIZE(a0)
- srl t2, t2, 2 # / 4ways
-
- /* L1 I$ */
-
- LOAD_XKPHYS(t0, CCA_CACHED)
- PTR_ADDU t1, t0, t1 # Compute end address
- PTR_SUBU t1, 32
-1:
- cache IndexInvalidate_I, 0(t0)
- bne t0, t1, 1b
- PTR_ADDU t0, 32
-
- /* L1 D$ */
-
- LOAD_XKPHYS(t0, CCA_CACHED)
- PTR_ADDU t1, t0, t2 # End address
- PTR_SUBU t1, 32
-1:
- cache IndexWBInvalidate_D, 0(t0)
- cache IndexWBInvalidate_D, 1(t0)
- cache IndexWBInvalidate_D, 2(t0)
- cache IndexWBInvalidate_D, 3(t0)
- bne t0, t1, 1b
- PTR_ADDU t0, 32
-
- /* L2 */
-
- LOAD_XKPHYS(t0, CCA_CACHED)
- lw t2, CI_L2SIZE(a0)
- srl t2, 2 # because cache is 4 way
- PTR_ADDU t1, t0, t2
- PTR_SUBU t1, 32
-1:
- cache IndexWBInvalidate_S, 0(t0)
- cache IndexWBInvalidate_S, 1(t0)
- cache IndexWBInvalidate_S, 2(t0)
- cache IndexWBInvalidate_S, 3(t0)
- bne t0, t1, 1b
- PTR_ADDU t0, 32
-
- j ra
- nop
-END(Loongson2_SyncCache)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_SyncICache(struct cpu_info *ci, vaddr_t va, size_t len)
- *
- * Invalidate the L1 instruction cache for at least range
- * of va to va + len - 1.
- * The address is reduced to a XKPHYS index to avoid TLB faults.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_InvalidateICache, 0)
- andi a1, ((1 << 14) - 1) # only keep index bits
- PTR_ADDU a2, 31 # Round up size
- LOAD_XKPHYS(a3, CCA_CACHED)
- PTR_ADDU a2, a1 # Add extra from address
- dsrl a1, a1, 5
- dsll a1, a1, 5 # align address
- PTR_SUBU a2, a2, a1
- PTR_ADDU a1, a3 # a1 now new XKPHYS address
- dsrl a2, a2, 5 # Number of unrolled loops
-1:
- PTR_ADDU a2, -1
- cache IndexInvalidate_I, 0(a1)
- bne a2, zero, 1b
- PTR_ADDU a1, 32
-
- j ra
- nop
-END(Loongson2_InvalidateICache)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa)
- *
- * Sync the L1 and L2 data cache page for address pa.
- * The virtual address is not used.
- *
- * The index for L1 is the low 14 bits of the virtual address. Since
- * the page size is 2**14 bits, it is possible to access the page
- * through any valid address.
- * The index for L2 is the low 17 bits of the physical address.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_SyncDCachePage, 0)
- sync
-
- LOAD_XKPHYS(a1, CCA_CACHED)
- or a1, a2 # a1 now new L1 address
- dsrl a1, a1, PAGE_SHIFT
- dsll a1, a1, PAGE_SHIFT # page align pa
- move a2, a1 # save for L2
-
- /* L1 */
- PTR_ADDU a3, a1, PAGE_SIZE-32
-1:
- cache HitWBInvalidate_D, 0(a1)
- bne a3, a1, 1b
- PTR_ADDU a1, 32
-
- /* L2 */
- PTR_ADDU a3, a2, PAGE_SIZE-32
-2:
- cache HitWBInvalidate_S, 0(a2)
- bne a3, a2, 2b
- PTR_ADDU a2, 32
-
- j ra
- nop
-END(Loongson2_SyncDCachePage)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t va, size_t len)
- *
- * Sync L1 and L2 data caches for range of va to va + len - 1.
- * Since L2 is writeback, we need to operate on L1 first, to make sure
- * L1 is clean. The usual mips strategy of doing L2 first, and then
- * the L1 orphans, will not work as the orphans would only be pushed
- * to L2, and not to physical memory.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_HitSyncDCache, 0)
- sync
-
- beq a2, zero, 3f # size is zero!
- PTR_ADDU a2, 31 # Round up
- PTR_ADDU a2, a2, a1 # Add extra from address
- dsrl a1, a1, 5
- dsll a1, a1, 5 # align to cacheline boundary
- PTR_SUBU a2, a2, a1
- dsrl a2, a2, 5 # Compute number of cache lines
-
- move a4, a2 # save for L2
- move a3, a1
-
- /* L1 */
-1:
- PTR_ADDU a2, -1
- cache HitWBInvalidate_D, 0(a1)
- bne a2, zero, 1b
- PTR_ADDU a1, 32
-
- /* L2 */
-2:
- PTR_ADDU a4, -1
- cache HitWBInvalidate_S, 0(a3)
- bne a4, zero, 2b
- PTR_ADDU a3, 32
-
-3:
- j ra
- nop
-END(Loongson2_HitSyncDCache)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t va, size_t len)
- *
- * Invalidate L1 and L2 data caches for range of va to va + len - 1.
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_HitInvalidateDCache, 0)
- sync
-
- beq a2, zero, 3f # size is zero!
- PTR_ADDU a2, 31 # Round up
- PTR_ADDU a2, a2, a1 # Add extra from address
- dsrl a1, a1, 5
- dsll a1, a1, 5 # align to cacheline boundary
- PTR_SUBU a2, a2, a1
- dsrl a2, a2, 5 # Compute number of cache lines
-
- move a4, a2 # save for L2
- move a3, a1
-
- /* L1 */
-1:
- PTR_ADDU a2, -1
- cache HitInvalidate_D, 0(a1)
- bne a2, zero, 1b
- PTR_ADDU a1, 32
-
- /* L2 */
-2:
- PTR_ADDU a4, -1
- cache HitInvalidate_S, 0(a3)
- bne a4, zero, 2b
- PTR_ADDU a3, 32
-
-3:
- j ra
- nop
-END(Loongson2_HitInvalidateDCache)
-
-/*----------------------------------------------------------------------------
- *
- * Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t va, size_t len, int how)
- *
- * Invalidate or flush L1 and L2 data caches for range of va to
- * va + len - 1.
- *
- * If how == 0 (invalidate):
- * L1 and L2 caches are invalidated or flushed if the area
- * does not match the alignment requirements.
- * If how == 1 (writeback):
- * L1 and L2 are written back.
- * If how == 2 (writeback and invalidate):
- * L1 and L2 are written back to memory and invalidated (flushed).
- *
- *----------------------------------------------------------------------------
- */
-LEAF(Loongson2_IOSyncDCache, 0)
- sync
-
- beqz a3, SyncInv # Sync PREREAD
- nop
-
-SyncWBInv:
- j Loongson2_HitSyncDCache
- nop
-
-SyncInv:
- or t0, a1, a2 # check if invalidate possible
- and t0, t0, 31 # both address and size must
- bnez t0, SyncWBInv # be aligned to the cache size
- nop
-
- j Loongson2_HitInvalidateDCache
- nop
-END(Loongson2_IOSyncDCache)
diff --git a/sys/arch/mips64/mips64/cache_loongson2.c b/sys/arch/mips64/mips64/cache_loongson2.c
new file mode 100644
index 00000000000..49370632582
--- /dev/null
+++ b/sys/arch/mips64/mips64/cache_loongson2.c
@@ -0,0 +1,315 @@
+/* $OpenBSD: cache_loongson2.c,v 1.1 2012/05/27 14:32:05 miod Exp $ */
+
+/*
+ * Copyright (c) 2009, 2012 Miodrag Vallat.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Cache handling code for Loongson 2E and 2F processors.
+ * This code could be made to work on 2C by not hardcoding the number of
+ * cache ways.
+ *
+ * 2E and 2F caches are :
+ * - L1 I$ is 4-way, VIPT, 32 bytes/line, 64KB total
+ * - L1 D$ is 4-way, VIPT, write-back, 32 bytes/line, 64KB total
+ * - L2 is 4-way, PIPT, write-back, 32 bytes/line, 512KB total
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <mips64/cache.h>
+#include <machine/cpu.h>
+
+#include <uvm/uvm_extern.h>
+
+/* L1 cache operations */
+#define IndexInvalidate_I 0x00
+#define IndexWBInvalidate_D 0x01
+#define IndexLoadTag_D 0x05
+#define IndexStoreTag_D 0x09
+#define HitInvalidate_D 0x11
+#define HitWBInvalidate_D 0x15
+#define IndexLoadData_D 0x19
+#define IndexStoreData_D 0x1d
+
+/* L2 cache operations */
+#define IndexWBInvalidate_S 0x03
+#define IndexLoadTag_S 0x07
+#define IndexStoreTag_S 0x0b
+#define HitInvalidate_S 0x13
+#define HitWBInvalidate_S 0x17
+#define IndexLoadData_S 0x1b
+#define IndexStoreData_S 0x1f
+
+#define cache(op,set,addr) \
+ __asm__ __volatile__ \
+ ("cache %0, %1(%2)" :: "i"(op), "i"(set), "r"(addr) : "memory")
+#define sync() \
+ __asm__ __volatile__ ("sync" ::: "memory");
+
+static __inline__ void ls2f_hitinv_primary(vaddr_t, vsize_t);
+static __inline__ void ls2f_hitinv_secondary(vaddr_t, vsize_t);
+static __inline__ void ls2f_hitwbinv_primary(vaddr_t, vsize_t);
+static __inline__ void ls2f_hitwbinv_secondary(vaddr_t, vsize_t);
+
+#define LS2F_CACHE_LINE 32UL
+#define LS2F_CACHE_WAYS 4UL
+#define LS2F_L1_SIZE (64UL * 1024UL)
+#define LS2F_L2_SIZE (512UL * 1024UL)
+
+void
+Loongson2_ConfigCache(struct cpu_info *ci)
+{
+ ci->ci_l1instcacheline = LS2F_CACHE_LINE;
+ ci->ci_l1instcachesize = LS2F_L1_SIZE;
+ ci->ci_l1datacacheline = LS2F_CACHE_LINE;
+ ci->ci_l1datacachesize = LS2F_L1_SIZE;
+ ci->ci_cacheways = LS2F_CACHE_WAYS;
+ ci->ci_l1instcacheset = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
+ ci->ci_l1datacacheset = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
+ ci->ci_l2size = LS2F_L2_SIZE;
+ ci->ci_l3size = 0;
+
+ cache_valias_mask = ci->ci_l1instcacheset & ~PAGE_MASK;
+
+ /* should not happen as we use 16KB pages */
+ if (cache_valias_mask != 0) {
+ cache_valias_mask |= PAGE_MASK;
+ pmap_prefer_mask |= cache_valias_mask;
+ }
+}
+
+/*
+ * Writeback and invalidate all caches.
+ */
+void
+Loongson2_SyncCache(struct cpu_info *ci)
+{
+ vaddr_t sva, eva;
+
+ sync();
+
+ sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
+ eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
+ while (sva != eva) {
+ cache(IndexInvalidate_I, 0, sva);
+ sva += LS2F_CACHE_LINE;
+ }
+
+ sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
+ eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
+ while (sva != eva) {
+ cache(IndexWBInvalidate_D, 0, sva);
+ cache(IndexWBInvalidate_D, 1, sva);
+ cache(IndexWBInvalidate_D, 2, sva);
+ cache(IndexWBInvalidate_D, 3, sva);
+ sva += LS2F_CACHE_LINE;
+ }
+
+ sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
+ eva = sva + LS2F_L2_SIZE / LS2F_CACHE_WAYS;
+ while (sva != eva) {
+ cache(IndexWBInvalidate_S, 0, sva);
+ cache(IndexWBInvalidate_S, 1, sva);
+ cache(IndexWBInvalidate_S, 2, sva);
+ cache(IndexWBInvalidate_S, 3, sva);
+ sva += LS2F_CACHE_LINE;
+ }
+}
+
+/*
+ * Invalidate I$ for the given range.
+ */
+void
+Loongson2_InvalidateICache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
+{
+ vaddr_t va, sva, eva;
+ vsize_t sz;
+
+ /* extend the range to integral cache lines */
+ va = _va & ~(LS2F_CACHE_LINE - 1);
+ sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
+
+ sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
+ /* keep only the index bits */
+ sva |= va & ((1UL << 14) - 1);
+ eva = sva + sz;
+ while (sva != eva) {
+ cache(IndexInvalidate_I, 0, sva);
+ sva += LS2F_CACHE_LINE;
+ }
+}
+
+/*
+ * Writeback D$ for the given page.
+ *
+ * The index for L1 is the low 14 bits of the virtual address. Since the
+ * page size is 2**14 bytes, it is possible to access the page through
+ * any valid address.
+ */
+void
+Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa)
+{
+ vaddr_t sva, eva;
+
+ sync();
+
+ sva = PHYS_TO_XKPHYS(pa, CCA_CACHED);
+ eva = sva + PAGE_SIZE;
+ for (va = sva; va != eva; va += LS2F_CACHE_LINE)
+ cache(HitWBInvalidate_D, 0, va);
+ for (va = sva; va != eva; va += LS2F_CACHE_LINE)
+ cache(HitWBInvalidate_S, 0, va);
+}
+
+/*
+ * Writeback D$ for the given range. Range is expected to be currently
+ * mapped, allowing the use of `Hit' operations. This is less aggressive
+ * than using `Index' operations.
+ */
+
+static __inline__ void
+ls2f_hitwbinv_primary(vaddr_t va, vsize_t sz)
+{
+ vaddr_t eva;
+
+ eva = va + sz;
+ while (va != eva) {
+ cache(HitWBInvalidate_D, 0, va);
+ va += LS2F_CACHE_LINE;
+ }
+}
+
+static __inline__ void
+ls2f_hitwbinv_secondary(vaddr_t va, vsize_t sz)
+{
+ vaddr_t eva;
+
+ eva = va + sz;
+ while (va != eva) {
+ cache(HitWBInvalidate_S, 0, va);
+ va += LS2F_CACHE_LINE;
+ }
+}
+
+void
+Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
+{
+ vaddr_t va;
+ vsize_t sz;
+
+ sync();
+
+ /* extend the range to integral cache lines */
+ va = _va & ~(LS2F_CACHE_LINE - 1);
+ sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
+
+ ls2f_hitwbinv_primary(va, sz);
+ ls2f_hitwbinv_secondary(va, sz);
+}
+
+/*
+ * Invalidate D$ for the given range. Range is expected to be currently
+ * mapped, allowing the use of `Hit' operations. This is less aggressive
+ * than using `Index' operations.
+ */
+
+static __inline__ void
+ls2f_hitinv_primary(vaddr_t va, vsize_t sz)
+{
+ vaddr_t eva;
+
+ eva = va + sz;
+ while (va != eva) {
+ cache(HitInvalidate_D, 0, va);
+ va += LS2F_CACHE_LINE;
+ }
+}
+
+static __inline__ void
+ls2f_hitinv_secondary(vaddr_t va, vsize_t sz)
+{
+ vaddr_t eva;
+
+ eva = va + sz;
+ while (va != eva) {
+ cache(HitInvalidate_S, 0, va);
+ va += LS2F_CACHE_LINE;
+ }
+}
+
+void
+Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
+{
+ vaddr_t va;
+ vsize_t sz;
+
+ /* extend the range to integral cache lines */
+ va = _va & ~(LS2F_CACHE_LINE - 1);
+ sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
+
+ ls2f_hitinv_primary(va, sz);
+ ls2f_hitinv_secondary(va, sz);
+
+ sync();
+}
+
+/*
+ * Backend for bus_dmamap_sync(). Enforce coherency of the given range
+ * by performing the necessary cache writeback and/or invalidate
+ * operations.
+ */
+void
+Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz, int how)
+{
+ vaddr_t va;
+ vsize_t sz;
+ int partial_start, partial_end;
+
+ /* extend the range to integral cache lines */
+ va = _va & ~(LS2F_CACHE_LINE - 1);
+ sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
+
+ switch (how) {
+ case CACHE_SYNC_R:
+ /* writeback partial cachelines */
+ if (((_va | _sz) & (LS2F_CACHE_LINE - 1)) != 0) {
+ partial_start = va != _va;
+ partial_end = va + sz != _va + _sz;
+ } else {
+ partial_start = partial_end = 0;
+ }
+ if (partial_start) {
+ cache(HitWBInvalidate_D, 0, va);
+ cache(HitWBInvalidate_S, 0, va);
+ va += LS2F_CACHE_LINE;
+ sz -= LS2F_CACHE_LINE;
+ }
+ if (sz != 0 && partial_end) {
+ cache(HitWBInvalidate_D, 0, va + sz - LS2F_CACHE_LINE);
+ cache(HitWBInvalidate_S, 0, va + sz - LS2F_CACHE_LINE);
+ sz -= LS2F_CACHE_LINE;
+ }
+ ls2f_hitinv_primary(va, sz);
+ ls2f_hitinv_secondary(va, sz);
+ break;
+ case CACHE_SYNC_X:
+ case CACHE_SYNC_W:
+ ls2f_hitwbinv_primary(va, sz);
+ ls2f_hitwbinv_secondary(va, sz);
+ break;
+ }
+}