summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorTed Unangst <tedu@cvs.openbsd.org>2008-11-23 23:44:02 +0000
committerTed Unangst <tedu@cvs.openbsd.org>2008-11-23 23:44:02 +0000
commit36d86842c62d9f315ab7ef88ca0d65df1b403787 (patch)
tree7a95a3954994cbe36ebba8ac731d19f72aa6c278 /sys
parent1d7e5147d3dc81330b16094ced38b06c932348c5 (diff)
softraid support for ata over ethernet (aoe). this includes a client and
part of a server. there's no configuration yet, and several other drawbacks, but it can be hammered into shape. i haven't moved the code forward in a year, and marco wants it in the tree to hack on.
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/softraid.c67
-rw-r--r--sys/dev/softraid_aoe.c732
-rw-r--r--sys/dev/softraidvar.h19
-rw-r--r--sys/net/if_aoe.c70
-rw-r--r--sys/net/if_aoe.h112
-rw-r--r--sys/net/if_ethersubr.c11
6 files changed, 1008 insertions, 3 deletions
diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c
index 568c0d22f0f..3c13eebd4d1 100644
--- a/sys/dev/softraid.c
+++ b/sys/dev/softraid.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid.c,v 1.122 2008/11/23 22:06:43 deraadt Exp $ */
+/* $OpenBSD: softraid.c,v 1.123 2008/11/23 23:44:01 tedu Exp $ */
/*
* Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -37,6 +37,12 @@
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/uio.h>
+#include <sys/workq.h>
+
+#ifdef AOE
+#include <sys/mbuf.h>
+#include <net/if_aoe.h>
+#endif /* AOE */
#include <crypto/cryptodev.h>
@@ -1815,6 +1821,22 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
vol_size = ch_entry->src_meta.scmi.scm_coerced_size;
break;
+#ifdef AOE
+#ifdef notyet
+ case 'A':
+ /* target */
+ if (no_chunk != 1)
+ goto unwind;
+ strlcpy(sd->sd_name, "AOE TARGET", sizeof(sd->sd_name));
+ break;
+#endif /* notyet */
+ case 'a':
+ /* initiator */
+ if (no_chunk != 1)
+ goto unwind;
+ strlcpy(sd->sd_name, "AOE INITIATOR", sizeof(sd->sd_name));
+ break;
+#endif /* AOE */
#ifdef CRYPTO
case 'C':
DNPRINTF(SR_D_IOCTL,
@@ -1970,6 +1992,49 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
sd->sd_set_vol_state = sr_raid1_set_vol_state;
break;
+#ifdef AOE
+#ifdef notyet
+ case 'A':
+ /* fill out discipline members */
+ sd->sd_type = SR_MD_AOE;
+ sd->sd_max_ccb_per_wu = no_chunk;
+ sd->sd_max_wu = SR_RAIDAOE_NOWU;
+
+ /* setup discipline pointers */
+ sd->sd_alloc_resources = sr_aoe_start_server;
+ sd->sd_free_resources = sr_aoe_free_resources;
+ sd->sd_scsi_inquiry = sr_raid_inquiry;
+ sd->sd_scsi_read_cap = sr_raid_read_cap;
+ sd->sd_scsi_tur = sr_raid_tur;
+ sd->sd_scsi_req_sense = sr_raid_request_sense;
+ sd->sd_scsi_start_stop = sr_raid_start_stop;
+ sd->sd_scsi_sync = sr_raid_sync;
+ sd->sd_scsi_rw = sr_aoe_rw;
+ sd->sd_set_chunk_state = sr_raid_set_chunk_state;
+ sd->sd_set_vol_state = sr_raid_set_vol_state;
+ break;
+#endif /* notyet */
+ case 'a':
+ /* fill out discipline members */
+ sd->sd_type = SR_MD_AOE;
+ sd->sd_max_ccb_per_wu = no_chunk;
+ sd->sd_max_wu = SR_RAIDAOE_NOWU;
+
+ /* setup discipline pointers */
+ sd->sd_alloc_resources = sr_aoe_alloc_resources;
+ sd->sd_free_resources = sr_aoe_free_resources;
+ sd->sd_scsi_inquiry = sr_raid_inquiry;
+ sd->sd_scsi_read_cap = sr_raid_read_cap;
+ sd->sd_scsi_tur = sr_raid_tur;
+ sd->sd_scsi_req_sense = sr_raid_request_sense;
+ sd->sd_scsi_start_stop = sr_raid_start_stop;
+ sd->sd_scsi_sync = sr_raid_sync;
+ sd->sd_scsi_rw = sr_aoe_rw;
+ /* XXX reuse raid 1 functions for now FIXME */
+ sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
+ sd->sd_set_vol_state = sr_raid1_set_vol_state;
+ break;
+#endif
#ifdef CRYPTO
case 'C':
/* fill out discipline members */
diff --git a/sys/dev/softraid_aoe.c b/sys/dev/softraid_aoe.c
new file mode 100644
index 00000000000..f63936a8ee9
--- /dev/null
+++ b/sys/dev/softraid_aoe.c
@@ -0,0 +1,732 @@
+/* $OpenBSD: softraid_aoe.c,v 1.1 2008/11/23 23:44:01 tedu Exp $ */
+/*
+ * Copyright (c) 2008 Ted Unangst <tedu@openbsd.org>
+ * Copyright (c) 2008 Marco Peereboom <marco@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bio.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/device.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/disk.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/fcntl.h>
+#include <sys/disklabel.h>
+#include <sys/mount.h>
+#include <sys/sensors.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+
+#include <scsi/scsi_all.h>
+#include <scsi/scsiconf.h>
+#include <scsi/scsi_disk.h>
+
+#include <dev/softraidvar.h>
+#include <dev/rndvar.h>
+
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/socketvar.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <net/ethertypes.h>
+#include <netinet/if_ether.h>
+#include <net/if_aoe.h>
+
+void sr_aoe_input(struct aoe_handler *, struct mbuf *);
+void sr_aoe_setup(struct aoe_handler *, struct mbuf *);
+void sr_aoe_timeout(void *);
+
+/* AOE disk functions */
+void
+sr_aoe_setup(struct aoe_handler *ah, struct mbuf *m)
+{
+ struct aoe_packet *ap;
+ int s;
+
+ ap = mtod(m, struct aoe_packet *);
+ if (ap->command != 1)
+ goto out;
+ if (ap->tag != 0)
+ goto out;
+ s = splnet();
+ ah->fn = (workq_fn)sr_aoe_input;
+ wakeup(ah);
+ splx(s);
+
+out:
+ m_freem(m);
+}
+
+int
+sr_aoe_alloc_resources(struct sr_discipline *sd)
+{
+ struct ifnet *ifp;
+ struct aoe_handler *ah;
+ unsigned char slot;
+ unsigned short shelf;
+ const char *nic;
+#if 0
+ struct mbuf *m;
+ struct ether_header *eh;
+ struct aoe_packet *ap;
+ int rv;
+#endif
+ int s;
+
+ if (!sd)
+ return (EINVAL);
+
+ DNPRINTF(SR_D_DIS, "%s: sr_aoe_alloc_resources\n",
+ DEVNAME(sd->sd_sc));
+
+ sr_wu_alloc(sd);
+ sr_ccb_alloc(sd);
+
+ /* where do these come from */
+ slot = 3;
+ shelf = 4;
+ nic = "ne0";
+
+ ifp = ifunit(nic);
+ if (!ifp) {
+ return EINVAL;
+ }
+ shelf = htons(shelf);
+
+ ah = malloc(sizeof(*ah), M_DEVBUF, M_WAITOK);
+ memset(ah, 0, sizeof(*ah));
+ ah->ifp = ifp;
+ ah->major = shelf;
+ ah->minor = slot;
+ ah->fn = (workq_fn)sr_aoe_input;
+ TAILQ_INIT(&ah->reqs);
+
+ s = splnet();
+ TAILQ_INSERT_TAIL(&aoe_handlers, ah, next);
+ splx(s);
+
+ sd->mds.mdd_aoe.sra_ah = ah;
+ sd->mds.mdd_aoe.sra_eaddr[0] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[1] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[2] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[3] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[4] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[5] = 0xff;
+
+#if 0
+ MGETHDR(m, M_WAIT, MT_HEADER);
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6);
+ memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6);
+ eh->ether_type = htons(ETHERTYPE_AOE);
+ ap = (struct aoe_packet *)&eh[1];
+ ap->vers = 1;
+ ap->flags = 0;
+ ap->error = 0;
+ ap->major = shelf;
+ ap->minor = slot;
+ ap->command = 1;
+ ap->tag = 0;
+ ap->buffercnt = 0;
+ ap->firmwarevers = 0;
+ ap->configsectorcnt = 0;
+ ap->serververs = 0;
+ ap->ccmd = 0;
+ ap->configstringlen = 0;
+ m->m_pkthdr.len = m->m_len = AOE_CFGHDRLEN;
+ s = splnet();
+ IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ rv = tsleep(ah, PRIBIO|PCATCH, "aoesetup", 30 * hz);
+ splx(s);
+ if (rv) {
+ s = splnet();
+ TAILQ_REMOVE(&aoe_handlers, ah, next);
+ splx(s);
+ free(ah, M_DEVBUF);
+ return rv;
+ }
+#endif
+ return 0;
+}
+
+int
+sr_aoe_free_resources(struct sr_discipline *sd)
+{
+ int s, rv = EINVAL;
+ struct aoe_handler *ah;
+
+ if (!sd)
+ return (rv);
+
+ DNPRINTF(SR_D_DIS, "%s: sr_aoe_free_resources\n",
+ DEVNAME(sd->sd_sc));
+
+ sr_wu_free(sd);
+ sr_ccb_free(sd);
+
+ ah = sd->mds.mdd_aoe.sra_ah;
+ if (ah) {
+ s = splnet();
+ TAILQ_REMOVE(&aoe_handlers, ah, next);
+ splx(s);
+ free(ah, M_DEVBUF);
+ }
+
+ if (sd->sd_meta)
+ free(sd->sd_meta, M_DEVBUF);
+
+ rv = 0;
+ return (rv);
+}
+
+int
+sr_aoe_rw(struct sr_workunit *wu)
+{
+ struct sr_discipline *sd = wu->swu_dis;
+ struct scsi_xfer *xs = wu->swu_xs;
+ struct sr_workunit *wup;
+ struct sr_chunk *scp;
+ int s, ios, rt;
+ daddr64_t fragblk, blk;
+ struct mbuf *m;
+ struct ether_header *eh;
+ struct aoe_packet *ap;
+ struct ifnet *ifp;
+ struct aoe_handler *ah;
+ struct aoe_req *ar;
+ int tag, rv, i;
+ int fragsize;
+ const int aoe_frags = 2;
+
+
+ DNPRINTF(SR_D_DIS, "%s: sr_aoe_rw 0x%02x\n", DEVNAME(sd->sd_sc),
+ xs->cmd->opcode);
+
+ /* blk and scsi error will be handled by sr_validate_io */
+ if (sr_validate_io(wu, &blk, "sr_aoe_rw"))
+ goto bad;
+
+ wu->swu_blk_start = blk;
+ wu->swu_blk_end = blk + (xs->datalen >> 9) - 1;
+
+ /* add 1 to get the inclusive amount, then some more for rounding */
+ ios = (wu->swu_blk_end - wu->swu_blk_start + 1 + (aoe_frags - 1)) /
+ aoe_frags;
+ wu->swu_io_count = ios;
+
+ if (xs->flags & SCSI_POLL)
+ panic("can't AOE poll");
+
+ /* walk queue backwards and fill in collider if we have one */
+ s = splbio();
+ if (0) /* XXX */ TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
+ if (wu->swu_blk_end < wup->swu_blk_start ||
+ wup->swu_blk_end < wu->swu_blk_start)
+ continue;
+
+ /* we have an LBA collision, defer wu */
+ wu->swu_state = SR_WU_DEFERRED;
+ if (wup->swu_collider)
+ /* wu is on deferred queue, append to last wu */
+ while (wup->swu_collider)
+ wup = wup->swu_collider;
+
+ wup->swu_collider = wu;
+ TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
+ sd->sd_wu_collisions++;
+ splx(s);
+ return (0);
+ }
+ for (i = 0; i < ios; i++) {
+ fragblk = blk + aoe_frags * i;
+ fragsize = aoe_frags * 512;
+ if (fragblk + aoe_frags - 1 > wu->swu_blk_end) {
+ fragsize = (wu->swu_blk_end - fragblk + 1) * 512;
+ }
+ if (xs->flags & SCSI_DATA_IN) {
+ rt = 0;
+ragain:
+ scp = sd->sd_vol.sv_chunks[0];
+ switch (scp->src_meta.scm_status) {
+ case BIOC_SDONLINE:
+ case BIOC_SDSCRUB:
+ break;
+
+ case BIOC_SDOFFLINE:
+ case BIOC_SDREBUILD:
+ case BIOC_SDHOTSPARE:
+ if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
+ goto ragain;
+
+ /* FALLTHROUGH */
+ default:
+ /* volume offline */
+ printf("%s: is offline, can't read\n",
+ DEVNAME(sd->sd_sc));
+ goto bad;
+ }
+ } else {
+ scp = sd->sd_vol.sv_chunks[0];
+ switch (scp->src_meta.scm_status) {
+ case BIOC_SDONLINE:
+ case BIOC_SDSCRUB:
+ case BIOC_SDREBUILD:
+ break;
+
+ case BIOC_SDHOTSPARE: /* should never happen */
+ case BIOC_SDOFFLINE:
+ wu->swu_io_count--;
+ goto bad;
+
+ default:
+ goto bad;
+ }
+
+ }
+
+ tag = ++sd->mds.mdd_aoe.sra_tag;
+ ah = sd->mds.mdd_aoe.sra_ah;
+ ar = malloc(sizeof(*ar), M_DEVBUF, M_NOWAIT);
+ if (!ar) {
+ splx(s);
+ return ENOMEM;
+ }
+ ar->v = wu;
+ ar->tag = tag;
+ ar->len = fragsize;
+ timeout_set(&ar->to, sr_aoe_timeout, ar);
+ TAILQ_INSERT_TAIL(&ah->reqs, ar, next);
+ splx(s);
+
+ ifp = ah->ifp;
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (xs->flags & SCSI_DATA_OUT && m) {
+ MCLGET(m, M_DONTWAIT);
+ if (!(m->m_flags & M_EXT)) {
+ m_freem(m);
+ m = NULL;
+ }
+ }
+ if (!m) {
+ s = splbio();
+ TAILQ_REMOVE(&ah->reqs, ar, next);
+ splx(s);
+ free(ar, M_DEVBUF);
+ return ENOMEM;
+ }
+
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6);
+ memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6);
+ eh->ether_type = htons(ETHERTYPE_AOE);
+ ap = (struct aoe_packet *)&eh[1];
+ ap->vers = 1;
+ ap->flags = 0;
+ ap->error = 0;
+ ap->major = ah->major;
+ ap->minor = ah->minor;
+ ap->command = 0;
+ ap->tag = tag;
+ ap->aflags = 0; /* AOE_EXTENDED; */
+ if (xs->flags & SCSI_DATA_OUT) {
+ ap->aflags |= AOE_WRITE;
+ ap->cmd = AOE_WRITE;
+ memcpy(ap->data, xs->data + (aoe_frags * i * 512), fragsize);
+ } else {
+ ap->cmd = AOE_READ;
+ }
+ ap->feature = 0;
+ ap->sectorcnt = fragsize / 512;
+ AOE_BLK2HDR(fragblk, ap);
+
+ m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN + fragsize;
+ s = splnet();
+ IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ timeout_add(&ar->to, hz * 10);
+ splx(s);
+
+ if (rv) {
+ s = splbio();
+ TAILQ_REMOVE(&ah->reqs, ar, next);
+ splx(s);
+ free(ar, M_DEVBUF);
+ return rv;
+ }
+ }
+
+
+ return (0);
+
+bad:
+ /* wu is unwound by sr_wu_put */
+ return (1);
+}
+
+void
+sr_aoe_input(struct aoe_handler *ah, struct mbuf *m)
+{
+ struct sr_discipline *sd;
+ struct scsi_xfer *xs;
+ struct aoe_req *ar;
+ struct aoe_packet *ap;
+ struct sr_workunit *wu, *wup;
+ daddr64_t blk, offset;
+ int len, s;
+ int tag;
+
+ ap = mtod(m, struct aoe_packet *);
+ tag = ap->tag;
+
+ s = splnet();
+ TAILQ_FOREACH(ar, &ah->reqs, next) {
+ if (ar->tag == tag) {
+ TAILQ_REMOVE(&ah->reqs, ar, next);
+ break;
+ }
+ }
+ splx(s);
+ if (!ar) {
+ goto out;
+ }
+ timeout_del(&ar->to);
+ wu = ar->v;
+ sd = wu->swu_dis;
+ xs = wu->swu_xs;
+
+
+ if (ap->flags & AOE_F_ERROR) {
+ wu->swu_ios_failed++;
+ goto out;
+ } else {
+ wu->swu_ios_succeeded++;
+ len = ar->len; /* XXX check against sector count */
+ if (xs->flags & SCSI_DATA_IN) {
+ AOE_HDR2BLK(ap, blk);
+ /* XXX bounds checking */
+ offset = (wu->swu_blk_start - blk) * 512;
+ memcpy(xs->data + offset, ap->data, len);
+ }
+ }
+
+ wu->swu_ios_complete++;
+
+ s = splbio();
+
+ if (wu->swu_ios_complete == wu->swu_io_count) {
+ if (wu->swu_ios_failed == wu->swu_ios_complete)
+ xs->error = XS_DRIVER_STUFFUP;
+ else
+ xs->error = XS_NOERROR;
+
+ xs->resid = 0;
+ xs->flags |= ITSDONE;
+
+ if (0) /* XXX */ TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
+ if (wu == wup) {
+ /* wu on pendq, remove */
+ TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
+
+ if (wu->swu_collider) {
+ /* restart deferred wu */
+ wu->swu_collider->swu_state =
+ SR_WU_INPROGRESS;
+ TAILQ_REMOVE(&sd->sd_wu_defq,
+ wu->swu_collider, swu_link);
+ /* sr_raid_startwu(wu->swu_collider); */
+ }
+ break;
+ }
+ }
+
+ /* do not change the order of these 2 functions */
+ sr_wu_put(wu);
+ scsi_done(xs);
+ }
+
+out:
+ m_freem(m);
+}
+
+void
+sr_aoe_timeout(void *v)
+{
+ struct aoe_req *ar = v;
+ struct sr_discipline *sd;
+ struct scsi_xfer *xs;
+ struct aoe_handler *ah;
+ struct aoe_req *ar2;
+ struct sr_workunit *wu;
+ int s;
+
+ wu = ar->v;
+ sd = wu->swu_dis;
+ xs = wu->swu_xs;
+ ah = sd->mds.mdd_aoe.sra_ah;
+
+ s = splnet();
+ TAILQ_FOREACH(ar2, &ah->reqs, next) {
+ if (ar2->tag == ar->tag) {
+ TAILQ_REMOVE(&ah->reqs, ar, next);
+ break;
+ }
+ }
+ splx(s);
+ if (!ar2)
+ return;
+ free(ar, M_DEVBUF);
+ /* give it another go */
+ /* XXX this is going to repeat the whole workunit */
+ sr_aoe_rw(wu);
+}
+
+#if 0
+int sr_aoe_start_server(struct sr_discipline *);
+void sr_aoe_server(struct aoe_handler *, struct mbuf *);
+
+int
+sr_aoe_start_server(struct sr_discipline *sd)
+{
+ struct ifnet *ifp;
+ struct aoe_handler *ah;
+ unsigned char slot;
+ unsigned short shelf;
+ const char *nic;
+ struct mbuf *m, *m2;
+ struct ether_header *eh;
+ struct aoe_packet *rp, *ap;
+ struct aoe_req *ar;
+ int rv, s;
+ int len;
+ struct buf buf;
+ daddr64_t blk;
+
+ if (!sd)
+ return (EINVAL);
+
+ DNPRINTF(SR_D_DIS, "%s: sr_aoe_alloc_resources\n",
+ DEVNAME(sd->sd_sc));
+
+ sr_alloc_wu(sd);
+ sr_alloc_ccb(sd);
+
+ /* where do these come from */
+ slot = 3;
+ shelf = 4;
+ nic = "ne0";
+
+ ifp = ifunit(nic);
+ if (!ifp) {
+ return EINVAL;
+ }
+ shelf = htons(shelf);
+
+ ah = malloc(sizeof(*ah), M_DEVBUF, M_WAITOK);
+ memset(ah, 0, sizeof(*ah));
+ ah->ifp = ifp;
+ ah->major = shelf;
+ ah->minor = slot;
+ ah->fn = (workq_fn)sr_aoe_server;
+ TAILQ_INIT(&ah->reqs);
+
+ s = splnet();
+ TAILQ_INSERT_TAIL(&aoe_handlers, ah, next);
+ splx(s);
+
+ sd->mds.mdd_aoe.sra_ah = ah;
+ sd->mds.mdd_aoe.sra_eaddr[0] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[1] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[2] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[3] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[4] = 0xff;
+ sd->mds.mdd_aoe.sra_eaddr[5] = 0xff;
+
+ while (1) {
+ s = splnet();
+resleep:
+ rv = tsleep(ah, PCATCH|PRIBIO, "wait", 0);
+ if (rv) {
+ splx(s);
+ break;
+ }
+ ar = TAILQ_FIRST(&ah->reqs);
+ if (!ar) {
+ goto resleep;
+ }
+ TAILQ_REMOVE(&ah->reqs, ar, next);
+ splx(s);
+ m2 = ar->v;
+ rp = mtod(m2, struct aoe_packet *);
+ if (rp->command) {
+ continue;
+ }
+ if (rp->aflags & AOE_AF_WRITE) {
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (!m)
+ continue;
+ len = rp->sectorcnt * 512;
+
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6);
+ memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6);
+ eh->ether_type = htons(ETHERTYPE_AOE);
+ ap = (struct aoe_packet *)&eh[1];
+ AOE_HDR2BLK(ap, blk);
+ memset(&buf, 0, sizeof buf);
+ buf.b_blkno = blk;
+ buf.b_flags = B_WRITE;
+ buf.b_bcount = len;
+ buf.b_bufsize = len;
+ buf.b_resid = len;
+ buf.b_data = rp->data;
+ buf.b_error = 0;
+ buf.b_proc = curproc;
+ buf.b_dev = sd->sd_vol.sv_chunks[0]->src_dev_mm;
+ LIST_INIT(&buf.b_dep);
+
+ s = splbio();
+ bdevsw_lookup(buf.b_dev)->d_strategy(&buf);
+ biowait(&buf);
+ splx(s);
+
+ ap->vers = 1;
+ ap->flags = AOE_F_RESP;
+ ap->error = 0;
+ ap->major = rp->major;
+ ap->minor = rp->minor;
+ ap->command = 1;
+ ap->tag = rp->tag;
+ ap->aflags = rp->aflags;
+ ap->feature = 0;
+ ap->sectorcnt = len / 512;
+ ap->cmd = AOE_WRITE;
+ ap->lba0 = 0;
+ ap->lba1 = 0;
+ ap->lba2 = 0;
+ ap->lba3 = 0;
+ ap->lba4 = 0;
+ ap->lba5 = 0;
+ ap->reserved = 0;
+
+ m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN;
+
+ s = splnet();
+ IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ splx(s);
+ } else {
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m) {
+ MCLGET(m, M_DONTWAIT);
+ if (!(m->m_flags & M_EXT)) {
+ m_freem(m);
+ m = NULL;
+ }
+ }
+ if (!m)
+ continue;
+ len = rp->sectorcnt * 512;
+
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6);
+ memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6);
+ eh->ether_type = htons(ETHERTYPE_AOE);
+ ap = (struct aoe_packet *)&eh[1];
+ AOE_HDR2BLK(ap, blk);
+ memset(&buf, 0, sizeof buf);
+ buf.b_blkno = blk;
+ buf.b_flags = B_WRITE;
+ buf.b_bcount = len;
+ buf.b_bufsize = len;
+ buf.b_resid = len;
+ buf.b_data = ap->data;
+ buf.b_error = 0;
+ buf.b_proc = curproc;
+ buf.b_dev = sd->sd_vol.sv_chunks[0]->src_dev_mm;
+ LIST_INIT(&buf.b_dep);
+
+ s = splbio();
+ bdevsw_lookup(buf.b_dev)->d_strategy(&buf);
+ biowait(&buf);
+ splx(s);
+
+ ap->vers = 1;
+ ap->flags = AOE_F_RESP;
+ ap->error = 0;
+ ap->major = rp->major;
+ ap->minor = rp->minor;
+ ap->command = 1;
+ ap->tag = rp->tag;
+ ap->aflags = rp->aflags;
+ ap->feature = 0;
+ ap->sectorcnt = len / 512;
+ ap->cmd = AOE_READ;
+ ap->lba0 = 0;
+ ap->lba1 = 0;
+ ap->lba2 = 0;
+ ap->lba3 = 0;
+ ap->lba4 = 0;
+ ap->lba5 = 0;
+ ap->reserved = 0;
+ m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN;
+
+ s = splnet();
+ IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv);
+ if ((ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+ splx(s);
+ }
+
+ }
+
+ s = splnet();
+ TAILQ_REMOVE(&aoe_handlers, ah, next);
+ splx(s);
+ free(ah, M_DEVBUF);
+
+ return rv;
+}
+
+void
+sr_aoe_server(struct aoe_handler *ah, struct mbuf *m)
+{
+ struct aoe_req *ar;
+ int s;
+
+ ar = malloc(sizeof *ar, M_DEVBUF, M_NOWAIT);
+ if (!ar) {
+ m_freem(m);
+ return;
+ }
+ ar->v = m;
+ s = splnet();
+ TAILQ_INSERT_TAIL(&ah->reqs, ar, next);
+ wakeup(ah);
+ splx(s);
+}
+#endif /* server */
diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h
index 20e77ab5e8c..827a75a6fc1 100644
--- a/sys/dev/softraidvar.h
+++ b/sys/dev/softraidvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraidvar.h,v 1.63 2008/07/29 01:18:02 marco Exp $ */
+/* $OpenBSD: softraidvar.h,v 1.64 2008/11/23 23:44:01 tedu Exp $ */
/*
* Copyright (c) 2006 Marco Peereboom <marco@peereboom.us>
* Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -304,6 +304,14 @@ struct sr_crypto {
u_int64_t scr_sid[SR_CRYPTO_MAXKEYS];
};
+/* ata over ethernet */
+#define SR_RAIDAOE_NOWU 2
+struct sr_aoe {
+ struct aoe_handler *sra_ah;
+ int sra_tag;
+ char sra_eaddr[6];
+};
+
struct sr_metadata_list {
u_int8_t sml_metadata[SR_META_SIZE * 512];
dev_t sml_mm;
@@ -350,6 +358,7 @@ struct sr_discipline {
#define SR_MD_RAID5 2
#define SR_MD_CACHE 3
#define SR_MD_CRYPTO 4
+#define SR_MD_AOE 5
char sd_name[10]; /* human readable dis name */
u_int8_t sd_scsibus; /* scsibus discipline uses */
struct scsi_link sd_link; /* link to midlayer */
@@ -358,6 +367,9 @@ struct sr_discipline {
struct sr_raid0 mdd_raid0;
struct sr_raid1 mdd_raid1;
struct sr_crypto mdd_crypto;
+#ifdef AOE
+ struct sr_aoe mdd_aoe;
+#endif /* AOE */
} sd_dis_specific;/* dis specific members */
#define mds sd_dis_specific
@@ -491,6 +503,11 @@ int sr_crypto_get_kdf(struct bioc_createraid *,
struct sr_discipline *);
int sr_crypto_create_keys(struct sr_discipline *);
+/* aoe discipline */
+int sr_aoe_alloc_resources(struct sr_discipline *);
+int sr_aoe_free_resources(struct sr_discipline *);
+int sr_aoe_rw(struct sr_workunit *);
+
#ifdef SR_DEBUG
void sr_dump_mem(u_int8_t *, int);
#endif
diff --git a/sys/net/if_aoe.c b/sys/net/if_aoe.c
new file mode 100644
index 00000000000..694fae87d5f
--- /dev/null
+++ b/sys/net/if_aoe.c
@@ -0,0 +1,70 @@
+/* $OpenBSD: if_aoe.c,v 1.1 2008/11/23 23:44:01 tedu Exp $ */
+/*
+ * Copyright (c) 2008 Ted Unangst <tedu@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/device.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/disk.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/fcntl.h>
+#include <sys/disklabel.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/workq.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/socketvar.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <net/ethertypes.h>
+#include <netinet/if_ether.h>
+#include <net/if_aoe.h>
+
+struct aoe_handler_head aoe_handlers = TAILQ_HEAD_INITIALIZER(aoe_handlers);
+
+void
+aoe_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct aoe_packet *ap;
+ struct aoe_handler *q = NULL;
+
+ splassert(IPL_NET);
+
+ ap = mtod(m, struct aoe_packet *);
+ /* printf("aoe packet %d %d\n", htons(ap->major), ap->minor); */
+
+ TAILQ_FOREACH(q, &aoe_handlers, next) {
+ if (q->ifp == ifp) {
+ if (ap->major == q->major && ap->minor == q->minor)
+ break;
+ }
+ }
+ if (!q) {
+ /* printf("no q\n"); */
+ m_freem(m);
+ return;
+ }
+ workq_add_task(NULL, 0, q->fn, q, m);
+}
diff --git a/sys/net/if_aoe.h b/sys/net/if_aoe.h
new file mode 100644
index 00000000000..79e0fe0eb52
--- /dev/null
+++ b/sys/net/if_aoe.h
@@ -0,0 +1,112 @@
+/* $OpenBSD: if_aoe.h,v 1.1 2008/11/23 23:44:01 tedu Exp $ */
+/*
+ * Copyright (c) 2007 Ted Unangst <tedu@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/workq.h>
+
+struct aoe_packet {
+#define AOE_F_ERROR (1 << 2)
+#define AOE_F_RESP (1 << 3)
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned char flags : 4;
+ unsigned char vers : 4;
+#else
+ unsigned char vers : 4;
+ unsigned char flags : 4;
+#endif
+ unsigned char error;
+ unsigned short major;
+ unsigned char minor;
+ unsigned char command;
+ unsigned int tag;
+ union {
+ /* command packet */
+ struct {
+#define AOE_AF_WRITE (1 << 0)
+#define AOE_AF_EXTENDED (1 << 6)
+ unsigned char aflags;
+ unsigned char feature;
+ unsigned char sectorcnt;
+#define AOE_READ 0x20
+#define AOE_READ_EXT 0x24
+#define AOE_WRITE 0x30
+#define AOE_WRITE_EXT 0x34
+ unsigned char cmd;
+ unsigned char lba0;
+ unsigned char lba1;
+ unsigned char lba2;
+#define AOE_LBABIT 0x40
+ unsigned char lba3;
+ unsigned char lba4;
+ unsigned char lba5;
+ unsigned short reserved;
+ unsigned char data[];
+ } __packed;
+ /* config packet */
+ struct {
+ unsigned short buffercnt;
+ unsigned short firmwarevers;
+ unsigned char configsectorcnt;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned char ccmd : 4;
+ unsigned char serververs : 4;
+#else
+ unsigned char serververs : 4;
+ unsigned char ccmd : 4;
+#endif
+ unsigned short configstringlen;
+ unsigned char configstring[1024];
+ } __packed;
+ };
+} __packed;
+
+#define AOE_BLK2HDR(blk, ap) do { \
+ ap->lba0 = blk; \
+ ap->lba1 = blk >> 8; \
+ ap->lba2 = blk >> 16; \
+} while (0)
+
+#define AOE_HDR2BLK(ap, blk) do { \
+ blk = 0; \
+ blk |= ap->lba0; \
+ blk |= ap->lba1 << 8; \
+ blk |= ap->lba2 << 16; \
+} while (0)
+
+
+#define AOE_CFGHDRLEN 32
+#define AOE_CMDHDRLEN 36
+
+struct aoe_req {
+ void *v;
+ int tag;
+ int len;
+ TAILQ_ENTRY(aoe_req) next;
+ struct timeout to;
+};
+
+struct aoe_handler {
+ TAILQ_ENTRY(aoe_handler) next;
+ unsigned short major;
+ unsigned char minor;
+ struct ifnet *ifp;
+ workq_fn fn;
+ TAILQ_HEAD(, aoe_req) reqs;
+};
+
+extern TAILQ_HEAD(aoe_handler_head, aoe_handler) aoe_handlers;
+extern int aoe_waiting;
+
+void aoe_input(struct ifnet *, struct mbuf *);
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index dcbaab93b0c..8adca1c9a9b 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_ethersubr.c,v 1.127 2008/10/16 19:12:51 naddy Exp $ */
+/* $OpenBSD: if_ethersubr.c,v 1.128 2008/11/23 23:44:01 tedu Exp $ */
/* $NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $ */
/*
@@ -135,6 +135,10 @@ didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>.
#include <net/if_trunk.h>
#endif
+#ifdef AOE
+#include <net/if_aoe.h>
+#endif /* AOE */
+
#ifdef INET6
#ifndef INET
#include <netinet/in.h>
@@ -736,6 +740,11 @@ decapsulate:
schednetisr(NETISR_PPPOE);
break;
#endif /* NPPPOE > 0 */
+#ifdef AOE
+ case ETHERTYPE_AOE:
+ aoe_input(ifp, m);
+ goto done;
+#endif /* AOE */
#ifdef MPLS
case ETHERTYPE_MPLS:
case ETHERTYPE_MPLS_MCAST: