From de9682ab698f5235a32886f9edaec5614160bfff Mon Sep 17 00:00:00 2001 From: Jason Wright Date: Sun, 21 Jul 2002 05:09:18 +0000 Subject: - Deal with the fact that nsp2k's bridge can't deal with burst read/write by detecting conditions that could generate one from a sequential access on the parent bridge and generating a harmless nop (scratch registers are just plain nice to have) - also, commit work in progress for dealing with modexp properly, one parameter still left to compute, tho. --- sys/dev/pci/noct.c | 232 ++++++++++++++++++++++++++++++++++++++------------ sys/dev/pci/noctvar.h | 14 ++- 2 files changed, 185 insertions(+), 61 deletions(-) (limited to 'sys/dev') diff --git a/sys/dev/pci/noct.c b/sys/dev/pci/noct.c index 45fddaaf247..2ea4cc509a9 100644 --- a/sys/dev/pci/noct.c +++ b/sys/dev/pci/noct.c @@ -1,4 +1,4 @@ -/* $OpenBSD: noct.c,v 1.10 2002/07/17 03:32:16 jason Exp $ */ +/* $OpenBSD: noct.c,v 1.11 2002/07/21 05:09:17 jason Exp $ */ /* * Copyright (c) 2002 Jason L. Wright (jason@thought.net) @@ -103,6 +103,8 @@ int noct_newsession(u_int32_t *, struct cryptoini *); int noct_freesession(u_int64_t); int noct_process(struct cryptop *); +u_int32_t noct_read_4(struct noct_softc *, bus_size_t); +void noct_write_4(struct noct_softc *, bus_size_t, u_int32_t); u_int64_t noct_read_8(struct noct_softc *, u_int32_t); void noct_write_8(struct noct_softc *, u_int32_t, u_int64_t); @@ -166,7 +168,8 @@ noct_attach(parent, self, aux) /* Before we do anything else, put the chip in little endian mode */ NOCT_WRITE_4(sc, NOCT_BRDG_ENDIAN, 0); - + sc->sc_rar_last = 0xffffffff; + sc->sc_waw_last = 0xffffffff; sc->sc_dmat = pa->pa_dmat; sc->sc_cid = crypto_get_driverid(0); @@ -1353,6 +1356,40 @@ noct_read_8(sc, reg) return (ret); } +/* + * NSP2000 is has a nifty bug, writes or reads to consecutive addresses + * can be coalesced by a PCI bridge and executed as a burst read or write + * which NSP2000's AMBA bridge doesn't grok. Avoid the hazard. + */ +u_int32_t +noct_read_4(sc, off) + struct noct_softc *sc; + bus_size_t off; +{ + if (sc->sc_rar_last == off - 4 || + sc->sc_rar_last == off + 4) { + bus_space_write_4(sc->sc_st, sc->sc_sh, NOCT_BRDG_TEST, 0); + sc->sc_rar_last = off; + sc->sc_waw_last = 0xffffffff; + } + return (bus_space_read_4(sc->sc_st, sc->sc_sh, off)); +} + +void +noct_write_4(sc, off, val) + struct noct_softc *sc; + bus_size_t off; + u_int32_t val; +{ + if (sc->sc_waw_last == off - 4 || + sc->sc_waw_last == off + 4) { + bus_space_read_4(sc->sc_st, sc->sc_sh, NOCT_BRDG_TEST); + sc->sc_waw_last = off; + sc->sc_rar_last = 0xffffffff; + } + bus_space_write_4(sc->sc_st, sc->sc_sh, off, val); +} + struct noct_softc * noct_kfind(krp) struct cryptkop *krp; @@ -1415,84 +1452,171 @@ noct_kprocess_modexp(sc, krp) struct cryptkop *krp; { int s, err; - u_long roff; u_int32_t wp, aidx, bidx, midx; u_int64_t adr; union noct_pkh_cmd *cmd; - int i; + int i, bits, mbits, digits, rmodidx, mmulidx; s = splnet(); - if (noct_pkh_nfree(sc) < 5) { - /* Need 5 entries: 3 loads, 1 store, and an op */ + if (noct_pkh_nfree(sc) < 7) { + /* Need 7 entries: 3 loads, 1 store, 3 ops */ splx(s); return (ENOMEM); } - wp = sc->sc_pkhwp; - - aidx = wp; - if (noct_kload(sc, &krp->krp_param[0], aidx)) + /* Load M */ + midx = wp = sc->sc_pkhwp; + mbits = bits = noct_ksigbits(&krp->krp_param[2]); + if (bits > 4096) { + err = ERANGE; goto errout; - if (++wp == NOCT_PKH_ENTRIES) - wp = 0; - - bidx = wp; - if (noct_kload(sc, &krp->krp_param[1], bidx)) + } + sc->sc_pkh_bnsw[midx].bn_siz = (bits + 127) / 128; + if (extent_alloc(sc->sc_pkh_bn, sc->sc_pkh_bnsw[midx].bn_siz, + EX_NOALIGN, 0, EX_NOBOUNDARY, EX_NOWAIT, + &sc->sc_pkh_bnsw[midx].bn_off)) { + err = ENOMEM; goto errout; + } + cmd = &sc->sc_pkhcmd[midx]; + cmd->cache.op = htole32(PKH_OP_CODE_LOAD); + cmd->cache.r = htole32(sc->sc_pkh_bnsw[midx].bn_off); + adr = sc->sc_bnmap->dm_segs[0].ds_addr + + (sc->sc_pkh_bnsw[midx].bn_off * 16); + cmd->cache.addrhi = htole32((adr >> 32) & 0xffffffff); + cmd->cache.addrlo = htole32((adr >> 0 ) & 0xffffffff); + cmd->cache.len = htole32(sc->sc_pkh_bnsw[midx].bn_siz); + cmd->cache.unused[0] = cmd->cache.unused[1] = cmd->cache.unused[2] = 0; + bus_dmamap_sync(sc->sc_dmat, sc->sc_pkhmap, + midx * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + BUS_DMASYNC_PREWRITE); + for (i = 0; i < (digits * 16); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[midx].bn_off * 16) + i] = 0; + for (i = 0; i < ((bits + 7) / 8); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[midx].bn_off * 16) + + (digits * 16) - 1 - i] = krp->krp_param[2].crp_p[i]; + bus_dmamap_sync(sc->sc_dmat, sc->sc_bnmap, + sc->sc_pkh_bnsw[midx].bn_off * 16, digits * 16, + BUS_DMASYNC_PREWRITE); if (++wp == NOCT_PKH_ENTRIES) wp = 0; - midx = wp; - if (noct_kload(sc, &krp->krp_param[2], midx)) - goto errout; + /* Store RMOD(m) -> location tmp1 */ + rmodidx = wp; + sc->sc_pkh_bnsw[rmodidx].bn_siz = sc->sc_pkh_bnsw[midx].bn_siz; + if (extent_alloc(sc->sc_pkh_bn, sc->sc_pkh_bnsw[rmodidx].bn_siz, + EX_NOALIGN, 0, EX_NOBOUNDARY, EX_NOWAIT, + &sc->sc_pkh_bnsw[rmodidx].bn_off)) { + err = ENOMEM; + goto errout_m; + } + cmd = &sc->sc_pkhcmd[rmodidx]; + cmd->arith.op = htole32(PKH_OP_CODE_RMOD); + cmd->arith.r = htole32(sc->sc_pkh_bnsw[rmodidx].bn_off); + cmd->arith.m = htole32(sc->sc_pkh_bnsw[midx].bn_off | + (sc->sc_pkh_bnsw[midx].bn_siz << 16)); + cmd->arith.a = cmd->arith.b = cmd->arith.c = cmd->arith.unused[0] = + cmd->arith.unused[1] = 0; + bus_dmamap_sync(sc->sc_dmat, sc->sc_pkhmap, + rmodidx * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + BUS_DMASYNC_PREWRITE); if (++wp == NOCT_PKH_ENTRIES) wp = 0; - /* alloc cache for result */ - if (extent_alloc(sc->sc_pkh_bn, sc->sc_pkh_bnsw[midx].bn_siz, - EX_NOALIGN, 0, EX_NOBOUNDARY, EX_NOWAIT, &roff)) { + /* Load A XXX deal with A < M padding ... */ + aidx = wp = sc->sc_pkhwp; + bits = noct_ksigbits(&krp->krp_param[0]); + if (bits > 4096 || bits > mbits) { + err = ERANGE; + goto errout_rmod; + } + sc->sc_pkh_bnsw[aidx].bn_siz = (bits + 127) / 128; + if (extent_alloc(sc->sc_pkh_bn, sc->sc_pkh_bnsw[aidx].bn_siz, + EX_NOALIGN, 0, EX_NOBOUNDARY, EX_NOWAIT, + &sc->sc_pkh_bnsw[aidx].bn_off)) { err = ENOMEM; - goto errout; + goto errout_rmod; } + cmd = &sc->sc_pkhcmd[aidx]; + cmd->cache.op = htole32(PKH_OP_CODE_LOAD); + cmd->cache.r = htole32(sc->sc_pkh_bnsw[aidx].bn_off); + adr = sc->sc_bnmap->dm_segs[0].ds_addr + + (sc->sc_pkh_bnsw[aidx].bn_off * 16); + cmd->cache.addrhi = htole32((adr >> 32) & 0xffffffff); + cmd->cache.addrlo = htole32((adr >> 0 ) & 0xffffffff); + cmd->cache.len = htole32(sc->sc_pkh_bnsw[aidx].bn_siz); + cmd->cache.unused[0] = cmd->cache.unused[1] = cmd->cache.unused[2] = 0; + bus_dmamap_sync(sc->sc_dmat, sc->sc_pkhmap, + aidx * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + BUS_DMASYNC_PREWRITE); + for (i = 0; i < (digits * 16); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[aidx].bn_off * 16) + i] = 0; + for (i = 0; i < ((bits + 7) / 8); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[aidx].bn_off * 16) + + (digits * 16) - 1 - i] = krp->krp_param[2].crp_p[i]; + bus_dmamap_sync(sc->sc_dmat, sc->sc_bnmap, + sc->sc_pkh_bnsw[aidx].bn_off * 16, digits * 16, + BUS_DMASYNC_PREWRITE); + if (++wp == NOCT_PKH_ENTRIES) + wp = 0; - cmd = &sc->sc_pkhcmd[wp]; + /* Compute (A * tmp1) mod m -> A */ + mmulidx = wp; + sc->sc_pkh_bnsw[mmulidx].bn_siz = 0; + sc->sc_pkh_bnsw[mmulidx].bn_off = 0; + cmd = &sc->sc_pkhcmd[mmulidx]; cmd->arith.op = htole32(PKH_OP_CODE_MUL); - cmd->arith.r = htole32(roff); - cmd->arith.m = htole32(((sc->sc_pkh_bnsw[midx].bn_siz) << 16) | - sc->sc_pkh_bnsw[midx].bn_off); - cmd->arith.a = htole32(((sc->sc_pkh_bnsw[aidx].bn_siz) << 16) | - sc->sc_pkh_bnsw[aidx].bn_off); - cmd->arith.b = htole32(((sc->sc_pkh_bnsw[bidx].bn_siz) << 16) | - sc->sc_pkh_bnsw[bidx].bn_off); + cmd->arith.r = htole32(sc->sc_pkh_bnsw[aidx].bn_off); + cmd->arith.m = htole32(sc->sc_pkh_bnsw[midx].bn_off | + (sc->sc_pkh_bnsw[midx].bn_siz << 16)); + cmd->arith.a = htole32(sc->sc_pkh_bnsw[aidx].bn_off | + (sc->sc_pkh_bnsw[aidx].bn_siz << 16)); + cmd->arith.b = htole32(sc->sc_pkh_bnsw[rmodidx].bn_off | + (sc->sc_pkh_bnsw[rmodidx].bn_siz << 16)); cmd->arith.c = cmd->arith.unused[0] = cmd->arith.unused[1] = 0; bus_dmamap_sync(sc->sc_dmat, sc->sc_pkhmap, - wp * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + rmodidx * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), BUS_DMASYNC_PREWRITE); - sc->sc_pkh_bnsw[wp].bn_callback = NULL; if (++wp == NOCT_PKH_ENTRIES) wp = 0; - cmd = &sc->sc_pkhcmd[wp]; - cmd->cache.op = htole32(PKH_OP_CODE_STORE | PKH_OP_SI); - cmd->cache.r = htole32(roff); - adr = sc->sc_bnmap->dm_segs[0].ds_addr + (roff * 16); + /* Load B */ + bidx = wp = sc->sc_pkhwp; + bits = noct_ksigbits(&krp->krp_param[1]); + if (bits > 4096) { + err = ERANGE; + goto errout_a; + } + sc->sc_pkh_bnsw[bidx].bn_siz = (bits + 127) / 128; + if (extent_alloc(sc->sc_pkh_bn, sc->sc_pkh_bnsw[bidx].bn_siz, + EX_NOALIGN, 0, EX_NOBOUNDARY, EX_NOWAIT, + &sc->sc_pkh_bnsw[bidx].bn_off)) { + err = ENOMEM; + goto errout_a; + } + cmd = &sc->sc_pkhcmd[bidx]; + cmd->cache.op = htole32(PKH_OP_CODE_LOAD); + cmd->cache.r = htole32(sc->sc_pkh_bnsw[bidx].bn_off); + adr = sc->sc_bnmap->dm_segs[0].ds_addr + + (sc->sc_pkh_bnsw[bidx].bn_off * 16); cmd->cache.addrhi = htole32((adr >> 32) & 0xffffffff); cmd->cache.addrlo = htole32((adr >> 0 ) & 0xffffffff); - cmd->cache.len = htole32(sc->sc_pkh_bnsw[midx].bn_siz * 16); + cmd->cache.len = htole32(sc->sc_pkh_bnsw[bidx].bn_siz); + cmd->cache.unused[0] = cmd->cache.unused[1] = cmd->cache.unused[2] = 0; bus_dmamap_sync(sc->sc_dmat, sc->sc_pkhmap, - wp * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + bidx * sizeof(union noct_pkh_cmd), sizeof(union noct_pkh_cmd), + BUS_DMASYNC_PREWRITE); + for (i = 0; i < (digits * 16); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[bidx].bn_off * 16) + i] = 0; + for (i = 0; i < ((bits + 7) / 8); i++) + sc->sc_bncache[(sc->sc_pkh_bnsw[bidx].bn_off * 16) + + (digits * 16) - 1 - i] = krp->krp_param[2].crp_p[i]; + bus_dmamap_sync(sc->sc_dmat, sc->sc_bnmap, + sc->sc_pkh_bnsw[bidx].bn_off * 16, digits * 16, BUS_DMASYNC_PREWRITE); - sc->sc_pkh_bnsw[wp].bn_callback = noct_modmul_cb; - sc->sc_pkh_bnsw[wp].bn_off = roff; - sc->sc_pkh_bnsw[wp].bn_siz = sc->sc_pkh_bnsw[midx].bn_siz; - sc->sc_pkh_bnsw[wp].bn_krp = krp; if (++wp == NOCT_PKH_ENTRIES) wp = 0; - bus_dmamap_sync(sc->sc_dmat, sc->sc_bnmap, - 0, sc->sc_bnmap->dm_mapsize, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - NOCT_WRITE_4(sc, NOCT_PKH_Q_PTR, wp); sc->sc_pkhwp = wp; @@ -1500,14 +1624,16 @@ noct_kprocess_modexp(sc, krp) return (0); +errout_a: + extent_free(sc->sc_pkh_bn, sc->sc_pkh_bnsw[aidx].bn_off, + sc->sc_pkh_bnsw[aidx].bn_siz, EX_NOWAIT); +errout_rmod: + extent_free(sc->sc_pkh_bn, sc->sc_pkh_bnsw[rmodidx].bn_off, + sc->sc_pkh_bnsw[rmodidx].bn_siz, EX_NOWAIT); +errout_m: + extent_free(sc->sc_pkh_bn, sc->sc_pkh_bnsw[midx].bn_off, + sc->sc_pkh_bnsw[midx].bn_siz, EX_NOWAIT); errout: - i = sc->sc_pkhwp; - while (i != wp) { - noct_pkh_freedesc(sc, i); - if (++i == NOCT_PKH_ENTRIES) - i = 0; - } - splx(s); krp->krp_status = err; crypto_kdone(krp); diff --git a/sys/dev/pci/noctvar.h b/sys/dev/pci/noctvar.h index 80832f6d670..863ab2bb403 100644 --- a/sys/dev/pci/noctvar.h +++ b/sys/dev/pci/noctvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: noctvar.h,v 1.5 2002/07/16 03:59:17 jason Exp $ */ +/* $OpenBSD: noctvar.h,v 1.6 2002/07/21 05:09:17 jason Exp $ */ /* * Copyright (c) 2002 Jason L. Wright (jason@thought.net) @@ -74,6 +74,7 @@ struct noct_softc { bus_space_handle_t sc_sh; bus_dma_tag_t sc_dmat; void *sc_ih; + bus_size_t sc_rar_last, sc_waw_last; u_int sc_ramsize; int32_t sc_cid; /* cryptodev id */ @@ -101,13 +102,10 @@ struct noct_softc { SIMPLEQ_HEAD(,noct_workq) sc_outq; }; -#define NOCT_READ_4(sc,r) \ - bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r)) -#define NOCT_WRITE_4(sc,r,v) \ - bus_space_write_4((sc)->sc_st, (sc)->sc_sh, (r), (v)) - -#define NOCT_READ_8(sc,r) noct_read_8(sc, r) -#define NOCT_WRITE_8(sc,r,v) noct_write_8(sc, r, v) +#define NOCT_READ_4(sc,r) noct_read_4((sc), (r)) +#define NOCT_WRITE_4(sc,r,v) noct_write_4((sc), (r), (v)) +#define NOCT_READ_8(sc,r) noct_read_8((sc), (r)) +#define NOCT_WRITE_8(sc,r,v) noct_write_8((sc), (r), (v)) #define NOCT_CARD(sid) (((sid) & 0xf0000000) >> 28) #define NOCT_SESSION(sid) ( (sid) & 0x0fffffff) -- cgit v1.2.3