diff options
author | Ted Unangst <tedu@cvs.openbsd.org> | 2008-11-23 23:44:02 +0000 |
---|---|---|
committer | Ted Unangst <tedu@cvs.openbsd.org> | 2008-11-23 23:44:02 +0000 |
commit | 36d86842c62d9f315ab7ef88ca0d65df1b403787 (patch) | |
tree | 7a95a3954994cbe36ebba8ac731d19f72aa6c278 /sys | |
parent | 1d7e5147d3dc81330b16094ced38b06c932348c5 (diff) |
softraid support for ata over ethernet (aoe). this includes a client and
part of a server. there's no configuration yet, and several other drawbacks,
but it can be hammered into shape. i haven't moved the code forward in a year,
and marco wants it in the tree to hack on.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/dev/softraid.c | 67 | ||||
-rw-r--r-- | sys/dev/softraid_aoe.c | 732 | ||||
-rw-r--r-- | sys/dev/softraidvar.h | 19 | ||||
-rw-r--r-- | sys/net/if_aoe.c | 70 | ||||
-rw-r--r-- | sys/net/if_aoe.h | 112 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 11 |
6 files changed, 1008 insertions, 3 deletions
diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c index 568c0d22f0f..3c13eebd4d1 100644 --- a/sys/dev/softraid.c +++ b/sys/dev/softraid.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid.c,v 1.122 2008/11/23 22:06:43 deraadt Exp $ */ +/* $OpenBSD: softraid.c,v 1.123 2008/11/23 23:44:01 tedu Exp $ */ /* * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> @@ -37,6 +37,12 @@ #include <sys/stat.h> #include <sys/conf.h> #include <sys/uio.h> +#include <sys/workq.h> + +#ifdef AOE +#include <sys/mbuf.h> +#include <net/if_aoe.h> +#endif /* AOE */ #include <crypto/cryptodev.h> @@ -1815,6 +1821,22 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); vol_size = ch_entry->src_meta.scmi.scm_coerced_size; break; +#ifdef AOE +#ifdef notyet + case 'A': + /* target */ + if (no_chunk != 1) + goto unwind; + strlcpy(sd->sd_name, "AOE TARGET", sizeof(sd->sd_name)); + break; +#endif /* notyet */ + case 'a': + /* initiator */ + if (no_chunk != 1) + goto unwind; + strlcpy(sd->sd_name, "AOE INITIATOR", sizeof(sd->sd_name)); + break; +#endif /* AOE */ #ifdef CRYPTO case 'C': DNPRINTF(SR_D_IOCTL, @@ -1970,6 +1992,49 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) sd->sd_set_chunk_state = sr_raid1_set_chunk_state; sd->sd_set_vol_state = sr_raid1_set_vol_state; break; +#ifdef AOE +#ifdef notyet + case 'A': + /* fill out discipline members */ + sd->sd_type = SR_MD_AOE; + sd->sd_max_ccb_per_wu = no_chunk; + sd->sd_max_wu = SR_RAIDAOE_NOWU; + + /* setup discipline pointers */ + sd->sd_alloc_resources = sr_aoe_start_server; + sd->sd_free_resources = sr_aoe_free_resources; + sd->sd_scsi_inquiry = sr_raid_inquiry; + sd->sd_scsi_read_cap = sr_raid_read_cap; + sd->sd_scsi_tur = sr_raid_tur; + sd->sd_scsi_req_sense = sr_raid_request_sense; + sd->sd_scsi_start_stop = sr_raid_start_stop; + sd->sd_scsi_sync = sr_raid_sync; + sd->sd_scsi_rw = sr_aoe_rw; + sd->sd_set_chunk_state = sr_raid_set_chunk_state; + sd->sd_set_vol_state = sr_raid_set_vol_state; + break; +#endif /* notyet */ + case 'a': + /* fill out discipline members */ + sd->sd_type = SR_MD_AOE; + sd->sd_max_ccb_per_wu = no_chunk; + sd->sd_max_wu = SR_RAIDAOE_NOWU; + + /* setup discipline pointers */ + sd->sd_alloc_resources = sr_aoe_alloc_resources; + sd->sd_free_resources = sr_aoe_free_resources; + sd->sd_scsi_inquiry = sr_raid_inquiry; + sd->sd_scsi_read_cap = sr_raid_read_cap; + sd->sd_scsi_tur = sr_raid_tur; + sd->sd_scsi_req_sense = sr_raid_request_sense; + sd->sd_scsi_start_stop = sr_raid_start_stop; + sd->sd_scsi_sync = sr_raid_sync; + sd->sd_scsi_rw = sr_aoe_rw; + /* XXX reuse raid 1 functions for now FIXME */ + sd->sd_set_chunk_state = sr_raid1_set_chunk_state; + sd->sd_set_vol_state = sr_raid1_set_vol_state; + break; +#endif #ifdef CRYPTO case 'C': /* fill out discipline members */ diff --git a/sys/dev/softraid_aoe.c b/sys/dev/softraid_aoe.c new file mode 100644 index 00000000000..f63936a8ee9 --- /dev/null +++ b/sys/dev/softraid_aoe.c @@ -0,0 +1,732 @@ +/* $OpenBSD: softraid_aoe.c,v 1.1 2008/11/23 23:44:01 tedu Exp $ */ +/* + * Copyright (c) 2008 Ted Unangst <tedu@openbsd.org> + * Copyright (c) 2008 Marco Peereboom <marco@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bio.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/device.h> +#include <sys/ioctl.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/disk.h> +#include <sys/rwlock.h> +#include <sys/queue.h> +#include <sys/fcntl.h> +#include <sys/disklabel.h> +#include <sys/mount.h> +#include <sys/sensors.h> +#include <sys/stat.h> +#include <sys/conf.h> +#include <sys/uio.h> + +#include <scsi/scsi_all.h> +#include <scsi/scsiconf.h> +#include <scsi/scsi_disk.h> + +#include <dev/softraidvar.h> +#include <dev/rndvar.h> + +#include <sys/socket.h> +#include <sys/mbuf.h> +#include <sys/socketvar.h> +#include <net/if.h> +#include <netinet/in.h> +#include <net/ethertypes.h> +#include <netinet/if_ether.h> +#include <net/if_aoe.h> + +void sr_aoe_input(struct aoe_handler *, struct mbuf *); +void sr_aoe_setup(struct aoe_handler *, struct mbuf *); +void sr_aoe_timeout(void *); + +/* AOE disk functions */ +void +sr_aoe_setup(struct aoe_handler *ah, struct mbuf *m) +{ + struct aoe_packet *ap; + int s; + + ap = mtod(m, struct aoe_packet *); + if (ap->command != 1) + goto out; + if (ap->tag != 0) + goto out; + s = splnet(); + ah->fn = (workq_fn)sr_aoe_input; + wakeup(ah); + splx(s); + +out: + m_freem(m); +} + +int +sr_aoe_alloc_resources(struct sr_discipline *sd) +{ + struct ifnet *ifp; + struct aoe_handler *ah; + unsigned char slot; + unsigned short shelf; + const char *nic; +#if 0 + struct mbuf *m; + struct ether_header *eh; + struct aoe_packet *ap; + int rv; +#endif + int s; + + if (!sd) + return (EINVAL); + + DNPRINTF(SR_D_DIS, "%s: sr_aoe_alloc_resources\n", + DEVNAME(sd->sd_sc)); + + sr_wu_alloc(sd); + sr_ccb_alloc(sd); + + /* where do these come from */ + slot = 3; + shelf = 4; + nic = "ne0"; + + ifp = ifunit(nic); + if (!ifp) { + return EINVAL; + } + shelf = htons(shelf); + + ah = malloc(sizeof(*ah), M_DEVBUF, M_WAITOK); + memset(ah, 0, sizeof(*ah)); + ah->ifp = ifp; + ah->major = shelf; + ah->minor = slot; + ah->fn = (workq_fn)sr_aoe_input; + TAILQ_INIT(&ah->reqs); + + s = splnet(); + TAILQ_INSERT_TAIL(&aoe_handlers, ah, next); + splx(s); + + sd->mds.mdd_aoe.sra_ah = ah; + sd->mds.mdd_aoe.sra_eaddr[0] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[1] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[2] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[3] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[4] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[5] = 0xff; + +#if 0 + MGETHDR(m, M_WAIT, MT_HEADER); + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6); + memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6); + eh->ether_type = htons(ETHERTYPE_AOE); + ap = (struct aoe_packet *)&eh[1]; + ap->vers = 1; + ap->flags = 0; + ap->error = 0; + ap->major = shelf; + ap->minor = slot; + ap->command = 1; + ap->tag = 0; + ap->buffercnt = 0; + ap->firmwarevers = 0; + ap->configsectorcnt = 0; + ap->serververs = 0; + ap->ccmd = 0; + ap->configstringlen = 0; + m->m_pkthdr.len = m->m_len = AOE_CFGHDRLEN; + s = splnet(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv); + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + rv = tsleep(ah, PRIBIO|PCATCH, "aoesetup", 30 * hz); + splx(s); + if (rv) { + s = splnet(); + TAILQ_REMOVE(&aoe_handlers, ah, next); + splx(s); + free(ah, M_DEVBUF); + return rv; + } +#endif + return 0; +} + +int +sr_aoe_free_resources(struct sr_discipline *sd) +{ + int s, rv = EINVAL; + struct aoe_handler *ah; + + if (!sd) + return (rv); + + DNPRINTF(SR_D_DIS, "%s: sr_aoe_free_resources\n", + DEVNAME(sd->sd_sc)); + + sr_wu_free(sd); + sr_ccb_free(sd); + + ah = sd->mds.mdd_aoe.sra_ah; + if (ah) { + s = splnet(); + TAILQ_REMOVE(&aoe_handlers, ah, next); + splx(s); + free(ah, M_DEVBUF); + } + + if (sd->sd_meta) + free(sd->sd_meta, M_DEVBUF); + + rv = 0; + return (rv); +} + +int +sr_aoe_rw(struct sr_workunit *wu) +{ + struct sr_discipline *sd = wu->swu_dis; + struct scsi_xfer *xs = wu->swu_xs; + struct sr_workunit *wup; + struct sr_chunk *scp; + int s, ios, rt; + daddr64_t fragblk, blk; + struct mbuf *m; + struct ether_header *eh; + struct aoe_packet *ap; + struct ifnet *ifp; + struct aoe_handler *ah; + struct aoe_req *ar; + int tag, rv, i; + int fragsize; + const int aoe_frags = 2; + + + DNPRINTF(SR_D_DIS, "%s: sr_aoe_rw 0x%02x\n", DEVNAME(sd->sd_sc), + xs->cmd->opcode); + + /* blk and scsi error will be handled by sr_validate_io */ + if (sr_validate_io(wu, &blk, "sr_aoe_rw")) + goto bad; + + wu->swu_blk_start = blk; + wu->swu_blk_end = blk + (xs->datalen >> 9) - 1; + + /* add 1 to get the inclusive amount, then some more for rounding */ + ios = (wu->swu_blk_end - wu->swu_blk_start + 1 + (aoe_frags - 1)) / + aoe_frags; + wu->swu_io_count = ios; + + if (xs->flags & SCSI_POLL) + panic("can't AOE poll"); + + /* walk queue backwards and fill in collider if we have one */ + s = splbio(); + if (0) /* XXX */ TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { + if (wu->swu_blk_end < wup->swu_blk_start || + wup->swu_blk_end < wu->swu_blk_start) + continue; + + /* we have an LBA collision, defer wu */ + wu->swu_state = SR_WU_DEFERRED; + if (wup->swu_collider) + /* wu is on deferred queue, append to last wu */ + while (wup->swu_collider) + wup = wup->swu_collider; + + wup->swu_collider = wu; + TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); + sd->sd_wu_collisions++; + splx(s); + return (0); + } + for (i = 0; i < ios; i++) { + fragblk = blk + aoe_frags * i; + fragsize = aoe_frags * 512; + if (fragblk + aoe_frags - 1 > wu->swu_blk_end) { + fragsize = (wu->swu_blk_end - fragblk + 1) * 512; + } + if (xs->flags & SCSI_DATA_IN) { + rt = 0; +ragain: + scp = sd->sd_vol.sv_chunks[0]; + switch (scp->src_meta.scm_status) { + case BIOC_SDONLINE: + case BIOC_SDSCRUB: + break; + + case BIOC_SDOFFLINE: + case BIOC_SDREBUILD: + case BIOC_SDHOTSPARE: + if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) + goto ragain; + + /* FALLTHROUGH */ + default: + /* volume offline */ + printf("%s: is offline, can't read\n", + DEVNAME(sd->sd_sc)); + goto bad; + } + } else { + scp = sd->sd_vol.sv_chunks[0]; + switch (scp->src_meta.scm_status) { + case BIOC_SDONLINE: + case BIOC_SDSCRUB: + case BIOC_SDREBUILD: + break; + + case BIOC_SDHOTSPARE: /* should never happen */ + case BIOC_SDOFFLINE: + wu->swu_io_count--; + goto bad; + + default: + goto bad; + } + + } + + tag = ++sd->mds.mdd_aoe.sra_tag; + ah = sd->mds.mdd_aoe.sra_ah; + ar = malloc(sizeof(*ar), M_DEVBUF, M_NOWAIT); + if (!ar) { + splx(s); + return ENOMEM; + } + ar->v = wu; + ar->tag = tag; + ar->len = fragsize; + timeout_set(&ar->to, sr_aoe_timeout, ar); + TAILQ_INSERT_TAIL(&ah->reqs, ar, next); + splx(s); + + ifp = ah->ifp; + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (xs->flags & SCSI_DATA_OUT && m) { + MCLGET(m, M_DONTWAIT); + if (!(m->m_flags & M_EXT)) { + m_freem(m); + m = NULL; + } + } + if (!m) { + s = splbio(); + TAILQ_REMOVE(&ah->reqs, ar, next); + splx(s); + free(ar, M_DEVBUF); + return ENOMEM; + } + + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6); + memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6); + eh->ether_type = htons(ETHERTYPE_AOE); + ap = (struct aoe_packet *)&eh[1]; + ap->vers = 1; + ap->flags = 0; + ap->error = 0; + ap->major = ah->major; + ap->minor = ah->minor; + ap->command = 0; + ap->tag = tag; + ap->aflags = 0; /* AOE_EXTENDED; */ + if (xs->flags & SCSI_DATA_OUT) { + ap->aflags |= AOE_WRITE; + ap->cmd = AOE_WRITE; + memcpy(ap->data, xs->data + (aoe_frags * i * 512), fragsize); + } else { + ap->cmd = AOE_READ; + } + ap->feature = 0; + ap->sectorcnt = fragsize / 512; + AOE_BLK2HDR(fragblk, ap); + + m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN + fragsize; + s = splnet(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv); + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + timeout_add(&ar->to, hz * 10); + splx(s); + + if (rv) { + s = splbio(); + TAILQ_REMOVE(&ah->reqs, ar, next); + splx(s); + free(ar, M_DEVBUF); + return rv; + } + } + + + return (0); + +bad: + /* wu is unwound by sr_wu_put */ + return (1); +} + +void +sr_aoe_input(struct aoe_handler *ah, struct mbuf *m) +{ + struct sr_discipline *sd; + struct scsi_xfer *xs; + struct aoe_req *ar; + struct aoe_packet *ap; + struct sr_workunit *wu, *wup; + daddr64_t blk, offset; + int len, s; + int tag; + + ap = mtod(m, struct aoe_packet *); + tag = ap->tag; + + s = splnet(); + TAILQ_FOREACH(ar, &ah->reqs, next) { + if (ar->tag == tag) { + TAILQ_REMOVE(&ah->reqs, ar, next); + break; + } + } + splx(s); + if (!ar) { + goto out; + } + timeout_del(&ar->to); + wu = ar->v; + sd = wu->swu_dis; + xs = wu->swu_xs; + + + if (ap->flags & AOE_F_ERROR) { + wu->swu_ios_failed++; + goto out; + } else { + wu->swu_ios_succeeded++; + len = ar->len; /* XXX check against sector count */ + if (xs->flags & SCSI_DATA_IN) { + AOE_HDR2BLK(ap, blk); + /* XXX bounds checking */ + offset = (wu->swu_blk_start - blk) * 512; + memcpy(xs->data + offset, ap->data, len); + } + } + + wu->swu_ios_complete++; + + s = splbio(); + + if (wu->swu_ios_complete == wu->swu_io_count) { + if (wu->swu_ios_failed == wu->swu_ios_complete) + xs->error = XS_DRIVER_STUFFUP; + else + xs->error = XS_NOERROR; + + xs->resid = 0; + xs->flags |= ITSDONE; + + if (0) /* XXX */ TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { + if (wu == wup) { + /* wu on pendq, remove */ + TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); + + if (wu->swu_collider) { + /* restart deferred wu */ + wu->swu_collider->swu_state = + SR_WU_INPROGRESS; + TAILQ_REMOVE(&sd->sd_wu_defq, + wu->swu_collider, swu_link); + /* sr_raid_startwu(wu->swu_collider); */ + } + break; + } + } + + /* do not change the order of these 2 functions */ + sr_wu_put(wu); + scsi_done(xs); + } + +out: + m_freem(m); +} + +void +sr_aoe_timeout(void *v) +{ + struct aoe_req *ar = v; + struct sr_discipline *sd; + struct scsi_xfer *xs; + struct aoe_handler *ah; + struct aoe_req *ar2; + struct sr_workunit *wu; + int s; + + wu = ar->v; + sd = wu->swu_dis; + xs = wu->swu_xs; + ah = sd->mds.mdd_aoe.sra_ah; + + s = splnet(); + TAILQ_FOREACH(ar2, &ah->reqs, next) { + if (ar2->tag == ar->tag) { + TAILQ_REMOVE(&ah->reqs, ar, next); + break; + } + } + splx(s); + if (!ar2) + return; + free(ar, M_DEVBUF); + /* give it another go */ + /* XXX this is going to repeat the whole workunit */ + sr_aoe_rw(wu); +} + +#if 0 +int sr_aoe_start_server(struct sr_discipline *); +void sr_aoe_server(struct aoe_handler *, struct mbuf *); + +int +sr_aoe_start_server(struct sr_discipline *sd) +{ + struct ifnet *ifp; + struct aoe_handler *ah; + unsigned char slot; + unsigned short shelf; + const char *nic; + struct mbuf *m, *m2; + struct ether_header *eh; + struct aoe_packet *rp, *ap; + struct aoe_req *ar; + int rv, s; + int len; + struct buf buf; + daddr64_t blk; + + if (!sd) + return (EINVAL); + + DNPRINTF(SR_D_DIS, "%s: sr_aoe_alloc_resources\n", + DEVNAME(sd->sd_sc)); + + sr_alloc_wu(sd); + sr_alloc_ccb(sd); + + /* where do these come from */ + slot = 3; + shelf = 4; + nic = "ne0"; + + ifp = ifunit(nic); + if (!ifp) { + return EINVAL; + } + shelf = htons(shelf); + + ah = malloc(sizeof(*ah), M_DEVBUF, M_WAITOK); + memset(ah, 0, sizeof(*ah)); + ah->ifp = ifp; + ah->major = shelf; + ah->minor = slot; + ah->fn = (workq_fn)sr_aoe_server; + TAILQ_INIT(&ah->reqs); + + s = splnet(); + TAILQ_INSERT_TAIL(&aoe_handlers, ah, next); + splx(s); + + sd->mds.mdd_aoe.sra_ah = ah; + sd->mds.mdd_aoe.sra_eaddr[0] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[1] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[2] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[3] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[4] = 0xff; + sd->mds.mdd_aoe.sra_eaddr[5] = 0xff; + + while (1) { + s = splnet(); +resleep: + rv = tsleep(ah, PCATCH|PRIBIO, "wait", 0); + if (rv) { + splx(s); + break; + } + ar = TAILQ_FIRST(&ah->reqs); + if (!ar) { + goto resleep; + } + TAILQ_REMOVE(&ah->reqs, ar, next); + splx(s); + m2 = ar->v; + rp = mtod(m2, struct aoe_packet *); + if (rp->command) { + continue; + } + if (rp->aflags & AOE_AF_WRITE) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (!m) + continue; + len = rp->sectorcnt * 512; + + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6); + memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6); + eh->ether_type = htons(ETHERTYPE_AOE); + ap = (struct aoe_packet *)&eh[1]; + AOE_HDR2BLK(ap, blk); + memset(&buf, 0, sizeof buf); + buf.b_blkno = blk; + buf.b_flags = B_WRITE; + buf.b_bcount = len; + buf.b_bufsize = len; + buf.b_resid = len; + buf.b_data = rp->data; + buf.b_error = 0; + buf.b_proc = curproc; + buf.b_dev = sd->sd_vol.sv_chunks[0]->src_dev_mm; + LIST_INIT(&buf.b_dep); + + s = splbio(); + bdevsw_lookup(buf.b_dev)->d_strategy(&buf); + biowait(&buf); + splx(s); + + ap->vers = 1; + ap->flags = AOE_F_RESP; + ap->error = 0; + ap->major = rp->major; + ap->minor = rp->minor; + ap->command = 1; + ap->tag = rp->tag; + ap->aflags = rp->aflags; + ap->feature = 0; + ap->sectorcnt = len / 512; + ap->cmd = AOE_WRITE; + ap->lba0 = 0; + ap->lba1 = 0; + ap->lba2 = 0; + ap->lba3 = 0; + ap->lba4 = 0; + ap->lba5 = 0; + ap->reserved = 0; + + m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN; + + s = splnet(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv); + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + splx(s); + } else { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m) { + MCLGET(m, M_DONTWAIT); + if (!(m->m_flags & M_EXT)) { + m_freem(m); + m = NULL; + } + } + if (!m) + continue; + len = rp->sectorcnt * 512; + + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_dhost, sd->mds.mdd_aoe.sra_eaddr, 6); + memcpy(eh->ether_shost, ((struct arpcom *)ifp)->ac_enaddr, 6); + eh->ether_type = htons(ETHERTYPE_AOE); + ap = (struct aoe_packet *)&eh[1]; + AOE_HDR2BLK(ap, blk); + memset(&buf, 0, sizeof buf); + buf.b_blkno = blk; + buf.b_flags = B_WRITE; + buf.b_bcount = len; + buf.b_bufsize = len; + buf.b_resid = len; + buf.b_data = ap->data; + buf.b_error = 0; + buf.b_proc = curproc; + buf.b_dev = sd->sd_vol.sv_chunks[0]->src_dev_mm; + LIST_INIT(&buf.b_dep); + + s = splbio(); + bdevsw_lookup(buf.b_dev)->d_strategy(&buf); + biowait(&buf); + splx(s); + + ap->vers = 1; + ap->flags = AOE_F_RESP; + ap->error = 0; + ap->major = rp->major; + ap->minor = rp->minor; + ap->command = 1; + ap->tag = rp->tag; + ap->aflags = rp->aflags; + ap->feature = 0; + ap->sectorcnt = len / 512; + ap->cmd = AOE_READ; + ap->lba0 = 0; + ap->lba1 = 0; + ap->lba2 = 0; + ap->lba3 = 0; + ap->lba4 = 0; + ap->lba5 = 0; + ap->reserved = 0; + m->m_pkthdr.len = m->m_len = AOE_CMDHDRLEN; + + s = splnet(); + IFQ_ENQUEUE(&ifp->if_snd, m, NULL, rv); + if ((ifp->if_flags & IFF_OACTIVE) == 0) + (*ifp->if_start)(ifp); + splx(s); + } + + } + + s = splnet(); + TAILQ_REMOVE(&aoe_handlers, ah, next); + splx(s); + free(ah, M_DEVBUF); + + return rv; +} + +void +sr_aoe_server(struct aoe_handler *ah, struct mbuf *m) +{ + struct aoe_req *ar; + int s; + + ar = malloc(sizeof *ar, M_DEVBUF, M_NOWAIT); + if (!ar) { + m_freem(m); + return; + } + ar->v = m; + s = splnet(); + TAILQ_INSERT_TAIL(&ah->reqs, ar, next); + wakeup(ah); + splx(s); +} +#endif /* server */ diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h index 20e77ab5e8c..827a75a6fc1 100644 --- a/sys/dev/softraidvar.h +++ b/sys/dev/softraidvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: softraidvar.h,v 1.63 2008/07/29 01:18:02 marco Exp $ */ +/* $OpenBSD: softraidvar.h,v 1.64 2008/11/23 23:44:01 tedu Exp $ */ /* * Copyright (c) 2006 Marco Peereboom <marco@peereboom.us> * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> @@ -304,6 +304,14 @@ struct sr_crypto { u_int64_t scr_sid[SR_CRYPTO_MAXKEYS]; }; +/* ata over ethernet */ +#define SR_RAIDAOE_NOWU 2 +struct sr_aoe { + struct aoe_handler *sra_ah; + int sra_tag; + char sra_eaddr[6]; +}; + struct sr_metadata_list { u_int8_t sml_metadata[SR_META_SIZE * 512]; dev_t sml_mm; @@ -350,6 +358,7 @@ struct sr_discipline { #define SR_MD_RAID5 2 #define SR_MD_CACHE 3 #define SR_MD_CRYPTO 4 +#define SR_MD_AOE 5 char sd_name[10]; /* human readable dis name */ u_int8_t sd_scsibus; /* scsibus discipline uses */ struct scsi_link sd_link; /* link to midlayer */ @@ -358,6 +367,9 @@ struct sr_discipline { struct sr_raid0 mdd_raid0; struct sr_raid1 mdd_raid1; struct sr_crypto mdd_crypto; +#ifdef AOE + struct sr_aoe mdd_aoe; +#endif /* AOE */ } sd_dis_specific;/* dis specific members */ #define mds sd_dis_specific @@ -491,6 +503,11 @@ int sr_crypto_get_kdf(struct bioc_createraid *, struct sr_discipline *); int sr_crypto_create_keys(struct sr_discipline *); +/* aoe discipline */ +int sr_aoe_alloc_resources(struct sr_discipline *); +int sr_aoe_free_resources(struct sr_discipline *); +int sr_aoe_rw(struct sr_workunit *); + #ifdef SR_DEBUG void sr_dump_mem(u_int8_t *, int); #endif diff --git a/sys/net/if_aoe.c b/sys/net/if_aoe.c new file mode 100644 index 00000000000..694fae87d5f --- /dev/null +++ b/sys/net/if_aoe.c @@ -0,0 +1,70 @@ +/* $OpenBSD: if_aoe.c,v 1.1 2008/11/23 23:44:01 tedu Exp $ */ +/* + * Copyright (c) 2008 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/device.h> +#include <sys/ioctl.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/disk.h> +#include <sys/rwlock.h> +#include <sys/queue.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/fcntl.h> +#include <sys/disklabel.h> +#include <sys/conf.h> +#include <sys/stat.h> +#include <sys/workq.h> +#include <sys/socket.h> +#include <sys/mbuf.h> +#include <sys/socketvar.h> +#include <net/if.h> +#include <netinet/in.h> +#include <net/ethertypes.h> +#include <netinet/if_ether.h> +#include <net/if_aoe.h> + +struct aoe_handler_head aoe_handlers = TAILQ_HEAD_INITIALIZER(aoe_handlers); + +void +aoe_input(struct ifnet *ifp, struct mbuf *m) +{ + struct aoe_packet *ap; + struct aoe_handler *q = NULL; + + splassert(IPL_NET); + + ap = mtod(m, struct aoe_packet *); + /* printf("aoe packet %d %d\n", htons(ap->major), ap->minor); */ + + TAILQ_FOREACH(q, &aoe_handlers, next) { + if (q->ifp == ifp) { + if (ap->major == q->major && ap->minor == q->minor) + break; + } + } + if (!q) { + /* printf("no q\n"); */ + m_freem(m); + return; + } + workq_add_task(NULL, 0, q->fn, q, m); +} diff --git a/sys/net/if_aoe.h b/sys/net/if_aoe.h new file mode 100644 index 00000000000..79e0fe0eb52 --- /dev/null +++ b/sys/net/if_aoe.h @@ -0,0 +1,112 @@ +/* $OpenBSD: if_aoe.h,v 1.1 2008/11/23 23:44:01 tedu Exp $ */ +/* + * Copyright (c) 2007 Ted Unangst <tedu@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/workq.h> + +struct aoe_packet { +#define AOE_F_ERROR (1 << 2) +#define AOE_F_RESP (1 << 3) +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned char flags : 4; + unsigned char vers : 4; +#else + unsigned char vers : 4; + unsigned char flags : 4; +#endif + unsigned char error; + unsigned short major; + unsigned char minor; + unsigned char command; + unsigned int tag; + union { + /* command packet */ + struct { +#define AOE_AF_WRITE (1 << 0) +#define AOE_AF_EXTENDED (1 << 6) + unsigned char aflags; + unsigned char feature; + unsigned char sectorcnt; +#define AOE_READ 0x20 +#define AOE_READ_EXT 0x24 +#define AOE_WRITE 0x30 +#define AOE_WRITE_EXT 0x34 + unsigned char cmd; + unsigned char lba0; + unsigned char lba1; + unsigned char lba2; +#define AOE_LBABIT 0x40 + unsigned char lba3; + unsigned char lba4; + unsigned char lba5; + unsigned short reserved; + unsigned char data[]; + } __packed; + /* config packet */ + struct { + unsigned short buffercnt; + unsigned short firmwarevers; + unsigned char configsectorcnt; +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned char ccmd : 4; + unsigned char serververs : 4; +#else + unsigned char serververs : 4; + unsigned char ccmd : 4; +#endif + unsigned short configstringlen; + unsigned char configstring[1024]; + } __packed; + }; +} __packed; + +#define AOE_BLK2HDR(blk, ap) do { \ + ap->lba0 = blk; \ + ap->lba1 = blk >> 8; \ + ap->lba2 = blk >> 16; \ +} while (0) + +#define AOE_HDR2BLK(ap, blk) do { \ + blk = 0; \ + blk |= ap->lba0; \ + blk |= ap->lba1 << 8; \ + blk |= ap->lba2 << 16; \ +} while (0) + + +#define AOE_CFGHDRLEN 32 +#define AOE_CMDHDRLEN 36 + +struct aoe_req { + void *v; + int tag; + int len; + TAILQ_ENTRY(aoe_req) next; + struct timeout to; +}; + +struct aoe_handler { + TAILQ_ENTRY(aoe_handler) next; + unsigned short major; + unsigned char minor; + struct ifnet *ifp; + workq_fn fn; + TAILQ_HEAD(, aoe_req) reqs; +}; + +extern TAILQ_HEAD(aoe_handler_head, aoe_handler) aoe_handlers; +extern int aoe_waiting; + +void aoe_input(struct ifnet *, struct mbuf *); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index dcbaab93b0c..8adca1c9a9b 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ethersubr.c,v 1.127 2008/10/16 19:12:51 naddy Exp $ */ +/* $OpenBSD: if_ethersubr.c,v 1.128 2008/11/23 23:44:01 tedu Exp $ */ /* $NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $ */ /* @@ -135,6 +135,10 @@ didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>. #include <net/if_trunk.h> #endif +#ifdef AOE +#include <net/if_aoe.h> +#endif /* AOE */ + #ifdef INET6 #ifndef INET #include <netinet/in.h> @@ -736,6 +740,11 @@ decapsulate: schednetisr(NETISR_PPPOE); break; #endif /* NPPPOE > 0 */ +#ifdef AOE + case ETHERTYPE_AOE: + aoe_input(ifp, m); + goto done; +#endif /* AOE */ #ifdef MPLS case ETHERTYPE_MPLS: case ETHERTYPE_MPLS_MCAST: |