summaryrefslogtreecommitdiff
path: root/sys/nfs/nfs_serv.c
diff options
context:
space:
mode:
authorThordur I. Bjornsson <thib@cvs.openbsd.org>2009-08-10 09:38:45 +0000
committerThordur I. Bjornsson <thib@cvs.openbsd.org>2009-08-10 09:38:45 +0000
commit17e713a81519d9ea564e565f6e1b5aa5f321c599 (patch)
tree1c456ffad4ac3b4a88848b2cad9cd5d0742af782 /sys/nfs/nfs_serv.c
parent7187b26f454821bd801c089b57c90d78f4d6b19c (diff)
Remove the v2 writegather code. It did help alot back in the 80s
but extensive performance benchmarking done by myself and jasper@ has shown that it doesn't help, at all - even on vaxens and in some cases it makes things significantly slower. "this excites me sexually" jetpack@ Tested by jasper@. OK blambert@
Diffstat (limited to 'sys/nfs/nfs_serv.c')
-rw-r--r--sys/nfs/nfs_serv.c373
1 files changed, 1 insertions, 372 deletions
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index cffb7abadb4..e689148102d 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_serv.c,v 1.81 2009/08/09 17:35:31 blambert Exp $ */
+/* $OpenBSD: nfs_serv.c,v 1.82 2009/08/10 09:38:44 thib Exp $ */
/* $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $ */
/*
@@ -88,11 +88,6 @@ extern enum vtype nv3tov_type[8];
extern struct nfsstats nfsstats;
extern nfstype nfsv2_type[9];
extern nfstype nfsv3_type[9];
-int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
-struct timeval nfsrvw_procrastinate_tv = {
- (NFS_GATHERDELAY * 1000) / 1000000, /* tv_sec */
- (NFS_GATHERDELAY * 1000) % 1000000 /* tv_usec */
-};
/*
* nfs v3 access service
@@ -867,372 +862,6 @@ bad:
}
/*
- * NFS write service with write gathering support. Called when
- * nfsrvw_procrastinate > 0.
- * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
- * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
- * Jan. 1994.
- */
-int
-nfsrv_writegather(ndp, slp, procp, mrq)
- struct nfsrv_descript **ndp;
- struct nfssvc_sock *slp;
- struct proc *procp;
- struct mbuf **mrq;
-{
- struct iovec *ivp;
- struct mbuf *mp;
- struct nfsrv_descript *wp, *nfsd, *nnfsd, *owp, *swp;
- struct nfs_fattr *fp;
- struct nfsm_info info;
- int i = 0;
- struct iovec *iov;
- struct nfsrvw_delayhash *wpp;
- struct ucred *cred;
- struct vattr va, forat;
- u_int32_t *tl;
- int32_t t1;
- int error = 0, rdonly, len = 0, forat_ret = 1;
- int ioflags, aftat_ret = 1, s, adjust, zeroing;
- char *cp2;
- struct vnode *vp;
- struct uio io, *uiop = &io;
- struct timeval tv;
-
- *mrq = NULL;
- if (*ndp) {
- nfsd = *ndp;
- *ndp = NULL;
- info.nmi_mreq = NULL;
- info.nmi_mrep = nfsd->nd_mrep;
- info.nmi_md = nfsd->nd_md;
- info.nmi_dpos = nfsd->nd_dpos;
- cred = &nfsd->nd_cr;
- info.nmi_v3 = (nfsd->nd_flag & ND_NFSV3);
- LIST_INIT(&nfsd->nd_coalesce);
- nfsd->nd_mreq = NULL;
- nfsd->nd_stable = NFSV3WRITE_FILESYNC;
- getmicrotime(&tv);
- timeradd(&tv, &nfsrvw_procrastinate_tv, &nfsd->nd_time);
-
- /*
- * Now, get the write header..
- */
- nfsm_srvmtofh(&nfsd->nd_fh);
- if (info.nmi_v3) {
- nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
- nfsd->nd_off = fxdr_hyper(tl);
- tl += 3;
- nfsd->nd_stable = fxdr_unsigned(int, *tl++);
- } else {
- nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
- nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
- tl += 2;
- }
- len = fxdr_unsigned(int32_t, *tl);
- nfsd->nd_len = len;
- nfsd->nd_eoff = nfsd->nd_off + len;
-
- /*
- * Trim the header out of the mbuf list and trim off any trailing
- * junk so that the mbuf list has only the write data.
- */
- zeroing = 1;
- i = 0;
- mp = info.nmi_mrep;
- while (mp) {
- if (mp == info.nmi_md) {
- zeroing = 0;
- adjust = info.nmi_dpos - mtod(mp, caddr_t);
- mp->m_len -= adjust;
- if (mp->m_len > 0 && adjust > 0)
- mp->m_data += adjust;
- }
- if (zeroing)
- mp->m_len = 0;
- else {
- i += mp->m_len;
- if (i > len) {
- mp->m_len -= (i - len);
- zeroing = 1;
- }
- }
- mp = mp->m_next;
- }
- if (len > NFS_MAXDATA || len < 0 || i < len) {
- m_freem(info.nmi_mrep);
-nfsmout:
- error = EIO;
- nfsm_writereply(2 * NFSX_UNSIGNED, info.nmi_v3);
- if (info.nmi_v3)
- nfsm_srvwcc(nfsd, forat_ret, &forat, aftat_ret, &va,
- &info.nmi_mb);
- nfsd->nd_mreq = info.nmi_mreq;
- nfsd->nd_mrep = NULL;
- timerclear(&nfsd->nd_time);
- }
-
- /*
- * Add this entry to the hash and time queues.
- */
- s = splsoftclock();
- owp = NULL;
- wp = LIST_FIRST(&slp->ns_tq);
- while (wp && timercmp(&wp->nd_time, &nfsd->nd_time, <)) {
- owp = wp;
- wp = LIST_NEXT(wp, nd_tq);
- }
- if (owp) {
- LIST_INSERT_AFTER(owp, nfsd, nd_tq);
- } else {
- LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
- }
- if (nfsd->nd_mrep) {
- wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
- owp = NULL;
- wp = LIST_FIRST(wpp);
- while (wp &&
- bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
- owp = wp;
- wp = LIST_NEXT(wp, nd_hash);
- }
- while (wp && wp->nd_off < nfsd->nd_off &&
- !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
- owp = wp;
- wp = LIST_NEXT(wp, nd_hash);
- }
- if (owp) {
- LIST_INSERT_AFTER(owp, nfsd, nd_hash);
-
- /*
- * Search the hash list for overlapping entries and
- * coalesce.
- */
- for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
- wp = LIST_NEXT(nfsd, nd_hash);
- if (NFSW_SAMECRED(owp, nfsd))
- nfsrvw_coalesce(owp, nfsd);
- }
- } else {
- LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
- }
- }
- splx(s);
- }
-
- /*
- * Now, do VOP_WRITE()s for any one(s) that need to be done now
- * and generate the associated reply mbuf list(s).
- */
-loop1:
- getmicrotime(&tv);
- s = splsoftclock();
- for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd != NULL; nfsd = owp) {
- owp = LIST_NEXT(nfsd, nd_tq);
- if (timercmp(&nfsd->nd_time, &tv, >))
- break;
- if (nfsd->nd_mreq)
- continue;
- LIST_REMOVE(nfsd, nd_tq);
- LIST_REMOVE(nfsd, nd_hash);
- splx(s);
- info.nmi_mrep = nfsd->nd_mrep;
- nfsd->nd_mrep = NULL;
- cred = &nfsd->nd_cr;
- info.nmi_v3 = (nfsd->nd_flag & ND_NFSV3);
- forat_ret = aftat_ret = 1;
- error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
- nfsd->nd_nam, &rdonly);
- if (!error) {
- if (info.nmi_v3)
- forat_ret = VOP_GETATTR(vp, &forat, cred, procp);
- if (vp->v_type != VREG) {
- if (info.nmi_v3)
- error = EINVAL;
- else
- error = (vp->v_type == VDIR) ? EISDIR : EACCES;
- }
- } else
- vp = NULL;
- if (!error) {
- error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1);
- }
-
- if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
- ioflags = IO_NODELOCKED;
- else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
- ioflags = (IO_SYNC | IO_NODELOCKED);
- else
- ioflags = (IO_SYNC | IO_NODELOCKED);
- uiop->uio_rw = UIO_WRITE;
- uiop->uio_segflg = UIO_SYSSPACE;
- uiop->uio_procp = NULL;
- uiop->uio_offset = nfsd->nd_off;
- uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
- if (uiop->uio_resid > 0) {
- mp = info.nmi_mrep;
- i = 0;
- while (mp) {
- if (mp->m_len > 0)
- i++;
- mp = mp->m_next;
- }
- uiop->uio_iovcnt = i;
- iov = malloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
- uiop->uio_iov = ivp = iov;
- mp = info.nmi_mrep;
- while (mp) {
- if (mp->m_len > 0) {
- ivp->iov_base = mtod(mp, caddr_t);
- ivp->iov_len = mp->m_len;
- ivp++;
- }
- mp = mp->m_next;
- }
- if (!error) {
- error = VOP_WRITE(vp, uiop, ioflags, cred);
- nfsstats.srvvop_writes++;
- }
- free(iov, M_TEMP);
- }
- m_freem(info.nmi_mrep);
- if (vp) {
- aftat_ret = VOP_GETATTR(vp, &va, cred, procp);
- vput(vp);
- }
-
- /*
- * Loop around generating replies for all write rpcs that have
- * now been completed.
- */
- swp = nfsd;
- do {
- if (error) {
- nfsm_writereply(NFSX_WCCDATA(info.nmi_v3), info.nmi_v3);
- if (info.nmi_v3) {
- nfsm_srvwcc(nfsd, forat_ret, &forat, aftat_ret,
- &va, &info.nmi_mb);
- }
- } else {
- nfsm_writereply(NFSX_PREOPATTR(info.nmi_v3) +
- NFSX_POSTOPORFATTR(info.nmi_v3) + 2 * NFSX_UNSIGNED +
- NFSX_WRITEVERF(info.nmi_v3), info.nmi_v3);
- if (info.nmi_v3) {
- nfsm_srvwcc(nfsd, forat_ret, &forat, aftat_ret,
- &va, &info.nmi_mb);
- tl = nfsm_build(&info.nmi_mb, 4 * NFSX_UNSIGNED);
- *tl++ = txdr_unsigned(nfsd->nd_len);
- *tl++ = txdr_unsigned(swp->nd_stable);
- /*
- * Actually, there is no need to txdr these fields,
- * but it may make the values more human readable,
- * for debugging purposes.
- */
- *tl++ = txdr_unsigned(boottime.tv_sec);
- *tl = txdr_unsigned(boottime.tv_usec);
- } else {
- fp = nfsm_build(&info.nmi_mb, NFSX_V2FATTR);
- nfsm_srvfattr(nfsd, &va, fp);
- }
- }
- nfsd->nd_mreq = info.nmi_mreq;
- if (nfsd->nd_mrep)
- panic("nfsrv_write: nd_mrep not free");
-
- /*
- * Done. Put it at the head of the timer queue so that
- * the final phase can return the reply.
- */
- s = splsoftclock();
- if (nfsd != swp) {
- timerclear(&nfsd->nd_time);
- LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
- }
- nfsd = LIST_FIRST(&swp->nd_coalesce);
- if (nfsd) {
- LIST_REMOVE(nfsd, nd_tq);
- }
- splx(s);
- } while (nfsd);
- s = splsoftclock();
- timerclear(&swp->nd_time);
- LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
- splx(s);
- goto loop1;
- }
- splx(s);
-
- /*
- * Search for a reply to return.
- */
- s = splsoftclock();
- for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd != NULL; nfsd = nnfsd) {
- nnfsd = LIST_NEXT(nfsd, nd_tq);
- if (nfsd->nd_mreq) {
- LIST_REMOVE(nfsd, nd_tq);
- *mrq = nfsd->nd_mreq;
- *ndp = nfsd;
- break;
- }
- }
- splx(s);
- return (0);
-}
-
-/*
- * Coalesce the write request nfsd into owp. To do this we must:
- * - remove nfsd from the queues
- * - merge nfsd->nd_mrep into owp->nd_mrep
- * - update the nd_eoff and nd_stable for owp
- * - put nfsd on owp's nd_coalesce list
- * NB: Must be called at splsoftclock().
- */
-void
-nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
-{
- int overlap;
- struct mbuf *mp;
-
- splsoftassert(IPL_SOFTCLOCK);
-
- LIST_REMOVE(nfsd, nd_hash);
- LIST_REMOVE(nfsd, nd_tq);
- if (owp->nd_eoff < nfsd->nd_eoff) {
- overlap = owp->nd_eoff - nfsd->nd_off;
- if (overlap < 0)
- panic("nfsrv_coalesce: bad off");
- if (overlap > 0)
- m_adj(nfsd->nd_mrep, overlap);
- mp = owp->nd_mrep;
- while (mp->m_next)
- mp = mp->m_next;
- mp->m_next = nfsd->nd_mrep;
- owp->nd_eoff = nfsd->nd_eoff;
- } else
- m_freem(nfsd->nd_mrep);
- nfsd->nd_mrep = NULL;
- if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
- owp->nd_stable = NFSV3WRITE_FILESYNC;
- else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
- owp->nd_stable == NFSV3WRITE_UNSTABLE)
- owp->nd_stable = NFSV3WRITE_DATASYNC;
- LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
-
- /*
- * nfsd might hold coalesce elements! Move them to owp.
- * Otherwise, requests may be lost and clients will be stuck.
- */
- if (LIST_FIRST(&nfsd->nd_coalesce) != NULL) {
- struct nfsrv_descript *m;
-
- while ((m = LIST_FIRST(&nfsd->nd_coalesce)) != NULL) {
- LIST_REMOVE(m, nd_tq);
- LIST_INSERT_HEAD(&owp->nd_coalesce, m, nd_tq);
- }
- }
-}
-
-/*
* nfs create service
* now does a truncate to 0 length via. setattr if it already exists
*/