diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 1995-10-18 08:53:40 +0000 |
commit | d6583bb2a13f329cf0332ef2570eb8bb8fc0e39c (patch) | |
tree | ece253b876159b39c620e62b6c9b1174642e070e /sys/nfs |
initial import of NetBSD tree
Diffstat (limited to 'sys/nfs')
-rw-r--r-- | sys/nfs/krpc.h | 39 | ||||
-rw-r--r-- | sys/nfs/krpc_subr.c | 581 | ||||
-rw-r--r-- | sys/nfs/nfs.h | 308 | ||||
-rw-r--r-- | sys/nfs/nfs_bio.c | 826 | ||||
-rw-r--r-- | sys/nfs/nfs_boot.c | 536 | ||||
-rw-r--r-- | sys/nfs/nfs_node.c | 282 | ||||
-rw-r--r-- | sys/nfs/nfs_nqlease.c | 1183 | ||||
-rw-r--r-- | sys/nfs/nfs_serv.c | 1902 | ||||
-rw-r--r-- | sys/nfs/nfs_socket.c | 1984 | ||||
-rw-r--r-- | sys/nfs/nfs_srvcache.c | 332 | ||||
-rw-r--r-- | sys/nfs/nfs_subs.c | 1133 | ||||
-rw-r--r-- | sys/nfs/nfs_syscalls.c | 885 | ||||
-rw-r--r-- | sys/nfs/nfs_vfsops.c | 785 | ||||
-rw-r--r-- | sys/nfs/nfs_vnops.c | 2563 | ||||
-rw-r--r-- | sys/nfs/nfsdiskless.h | 60 | ||||
-rw-r--r-- | sys/nfs/nfsm_subs.h | 270 | ||||
-rw-r--r-- | sys/nfs/nfsmount.h | 128 | ||||
-rw-r--r-- | sys/nfs/nfsnode.h | 167 | ||||
-rw-r--r-- | sys/nfs/nfsrtt.h | 98 | ||||
-rw-r--r-- | sys/nfs/nfsrvcache.h | 84 | ||||
-rw-r--r-- | sys/nfs/nfsv2.h | 262 | ||||
-rw-r--r-- | sys/nfs/nqnfs.h | 202 | ||||
-rw-r--r-- | sys/nfs/rpcv2.h | 90 | ||||
-rw-r--r-- | sys/nfs/swapnfs.c | 63 | ||||
-rw-r--r-- | sys/nfs/xdr_subs.h | 83 |
25 files changed, 14846 insertions, 0 deletions
diff --git a/sys/nfs/krpc.h b/sys/nfs/krpc.h new file mode 100644 index 00000000000..32ed3b8e302 --- /dev/null +++ b/sys/nfs/krpc.h @@ -0,0 +1,39 @@ +/* $NetBSD: krpc.h,v 1.3 1995/04/24 21:54:56 gwr Exp $ */ + +#include <sys/cdefs.h> + +int krpc_call __P((struct sockaddr_in *sin, \ + u_int prog, u_int vers, u_int func, \ + struct mbuf **data, struct mbuf **from)); + +int krpc_portmap __P((struct sockaddr_in *sin, \ + u_int prog, u_int vers, u_int16_t *portp)); + +struct mbuf * xdr_string_encode __P((char *str, int len)); +struct mbuf * xdr_string_decode __P((struct mbuf *m, char *str, int *len_p)); +struct mbuf * xdr_inaddr_encode __P((struct in_addr *ia)); +struct mbuf * xdr_inaddr_decode __P((struct mbuf *m, struct in_addr *ia)); + + +/* + * RPC definitions for the portmapper + */ +#define PMAPPORT 111 +#define PMAPPROG 100000 +#define PMAPVERS 2 +#define PMAPPROC_NULL 0 +#define PMAPPROC_SET 1 +#define PMAPPROC_UNSET 2 +#define PMAPPROC_GETPORT 3 +#define PMAPPROC_DUMP 4 +#define PMAPPROC_CALLIT 5 + + +/* + * RPC definitions for bootparamd + */ +#define BOOTPARAM_PROG 100026 +#define BOOTPARAM_VERS 1 +#define BOOTPARAM_WHOAMI 1 +#define BOOTPARAM_GETFILE 2 + diff --git a/sys/nfs/krpc_subr.c b/sys/nfs/krpc_subr.c new file mode 100644 index 00000000000..a59b5fa0c9a --- /dev/null +++ b/sys/nfs/krpc_subr.c @@ -0,0 +1,581 @@ +/* $NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $ */ + +/* + * Copyright (c) 1995 Gordon Ross, Adam Glass + * Copyright (c) 1992 Regents of the University of California. + * All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * partially based on: + * libnetboot/rpc.c + * @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp (LBL) + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/ioctl.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/mbuf.h> +#include <sys/reboot.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <net/if.h> +#include <netinet/in.h> + +#include <nfs/rpcv2.h> +#include <nfs/krpc.h> +#include <nfs/xdr_subs.h> + +/* + * Kernel support for Sun RPC + * + * Used currently for bootstrapping in nfs diskless configurations. + */ + +/* + * Generic RPC headers + */ + +struct auth_info { + u_int32_t authtype; /* auth type */ + u_int32_t authlen; /* auth length */ +}; + +struct auth_unix { + int32_t ua_time; + int32_t ua_hostname; /* null */ + int32_t ua_uid; + int32_t ua_gid; + int32_t ua_gidlist; /* null */ +}; + +struct rpc_call { + u_int32_t rp_xid; /* request transaction id */ + int32_t rp_direction; /* call direction (0) */ + u_int32_t rp_rpcvers; /* rpc version (2) */ + u_int32_t rp_prog; /* program */ + u_int32_t rp_vers; /* version */ + u_int32_t rp_proc; /* procedure */ + struct auth_info rpc_auth; + struct auth_unix rpc_unix; + struct auth_info rpc_verf; +}; + +struct rpc_reply { + u_int32_t rp_xid; /* request transaction id */ + int32_t rp_direction; /* call direction (1) */ + int32_t rp_astatus; /* accept status (0: accepted) */ + union { + u_int32_t rpu_errno; + struct { + struct auth_info rok_auth; + u_int32_t rok_status; + } rpu_rok; + } rp_u; +}; +#define rp_errno rp_u.rpu_errno +#define rp_auth rp_u.rpu_rok.rok_auth +#define rp_status rp_u.rpu_rok.rok_status + +#define MIN_REPLY_HDR 16 /* xid, dir, astat, errno */ + +/* + * What is the longest we will wait before re-sending a request? + * Note this is also the frequency of "RPC timeout" messages. + * The re-send loop count sup linearly to this maximum, so the + * first complaint will happen after (1+2+3+4+5)=15 seconds. + */ +#define MAX_RESEND_DELAY 5 /* seconds */ + +/* + * Call portmap to lookup a port number for a particular rpc program + * Returns non-zero error on failure. + */ +int +krpc_portmap(sin, prog, vers, portp) + struct sockaddr_in *sin; /* server address */ + u_int prog, vers; /* host order */ + u_int16_t *portp; /* network order */ +{ + struct sdata { + u_int32_t prog; /* call program */ + u_int32_t vers; /* call version */ + u_int32_t proto; /* call protocol */ + u_int32_t port; /* call port (unused) */ + } *sdata; + struct rdata { + u_int16_t pad; + u_int16_t port; + } *rdata; + struct mbuf *m; + int error; + + /* The portmapper port is fixed. */ + if (prog == PMAPPROG) { + *portp = htons(PMAPPORT); + return 0; + } + + m = m_get(M_WAIT, MT_DATA); + if (m == NULL) + return ENOBUFS; + sdata = mtod(m, struct sdata *); + m->m_len = sizeof(*sdata); + + /* Do the RPC to get it. */ + sdata->prog = txdr_unsigned(prog); + sdata->vers = txdr_unsigned(vers); + sdata->proto = txdr_unsigned(IPPROTO_UDP); + sdata->port = 0; + + sin->sin_port = htons(PMAPPORT); + error = krpc_call(sin, PMAPPROG, PMAPVERS, + PMAPPROC_GETPORT, &m, NULL); + if (error) + return error; + + if (m->m_len < sizeof(*rdata)) { + m = m_pullup(m, sizeof(*rdata)); + if (m == NULL) + return ENOBUFS; + } + rdata = mtod(m, struct rdata *); + *portp = rdata->port; + + m_freem(m); + return 0; +} + +/* + * Do a remote procedure call (RPC) and wait for its reply. + * If from_p is non-null, then we are doing broadcast, and + * the address from whence the response came is saved there. + */ +int +krpc_call(sa, prog, vers, func, data, from_p) + struct sockaddr_in *sa; + u_int prog, vers, func; + struct mbuf **data; /* input/output */ + struct mbuf **from_p; /* output */ +{ + struct socket *so; + struct sockaddr_in *sin; + struct mbuf *m, *nam, *mhead, *from; + struct rpc_call *call; + struct rpc_reply *reply; + struct uio auio; + int error, rcvflg, timo, secs, len; + static u_int32_t xid = ~0xFF; + u_int tport; + + /* + * Validate address family. + * Sorry, this is INET specific... + */ + if (sa->sin_family != AF_INET) + return (EAFNOSUPPORT); + + /* Free at end if not null. */ + nam = mhead = NULL; + from = NULL; + + /* + * Create socket and set its recieve timeout. + */ + if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0))) + goto out; + + m = m_get(M_WAIT, MT_SOOPTS); + if (m == NULL) { + error = ENOBUFS; + goto out; + } else { + struct timeval *tv; + tv = mtod(m, struct timeval *); + m->m_len = sizeof(*tv); + tv->tv_sec = 1; + tv->tv_usec = 0; + if ((error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m))) + goto out; + } + + /* + * Enable broadcast if necessary. + */ + if (from_p) { + int *on; + m = m_get(M_WAIT, MT_SOOPTS); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + on = mtod(m, int *); + m->m_len = sizeof(*on); + *on = 1; + if ((error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m))) + goto out; + } + + /* + * Bind the local endpoint to a reserved port, + * because some NFS servers refuse requests from + * non-reserved (non-privileged) ports. + */ + m = m_getclr(M_WAIT, MT_SONAME); + sin = mtod(m, struct sockaddr_in *); + sin->sin_len = m->m_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + tport = IPPORT_RESERVED; + do { + tport--; + sin->sin_port = htons(tport); + error = sobind(so, m); + } while (error == EADDRINUSE && + tport > IPPORT_RESERVED / 2); + m_freem(m); + if (error) { + printf("bind failed\n"); + goto out; + } + + /* + * Setup socket address for the server. + */ + nam = m_get(M_WAIT, MT_SONAME); + if (nam == NULL) { + error = ENOBUFS; + goto out; + } + sin = mtod(nam, struct sockaddr_in *); + bcopy((caddr_t)sa, (caddr_t)sin, + (nam->m_len = sa->sin_len)); + + /* + * Prepend RPC message header. + */ + mhead = m_gethdr(M_WAIT, MT_DATA); + mhead->m_next = *data; + call = mtod(mhead, struct rpc_call *); + mhead->m_len = sizeof(*call); + bzero((caddr_t)call, sizeof(*call)); + /* rpc_call part */ + xid++; + call->rp_xid = txdr_unsigned(xid); + /* call->rp_direction = 0; */ + call->rp_rpcvers = txdr_unsigned(2); + call->rp_prog = txdr_unsigned(prog); + call->rp_vers = txdr_unsigned(vers); + call->rp_proc = txdr_unsigned(func); + /* rpc_auth part (auth_unix as root) */ + call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX); + call->rpc_auth.authlen = txdr_unsigned(sizeof(struct auth_unix)); + /* rpc_verf part (auth_null) */ + call->rpc_verf.authtype = 0; + call->rpc_verf.authlen = 0; + + /* + * Setup packet header + */ + len = 0; + m = mhead; + while (m) { + len += m->m_len; + m = m->m_next; + } + mhead->m_pkthdr.len = len; + mhead->m_pkthdr.rcvif = NULL; + + /* + * Send it, repeatedly, until a reply is received, + * but delay each re-send by an increasing amount. + * If the delay hits the maximum, start complaining. + */ + timo = 0; + for (;;) { + /* Send RPC request (or re-send). */ + m = m_copym(mhead, 0, M_COPYALL, M_WAIT); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + error = sosend(so, nam, NULL, m, NULL, 0); + if (error) { + printf("krpc_call: sosend: %d\n", error); + goto out; + } + m = NULL; + + /* Determine new timeout. */ + if (timo < MAX_RESEND_DELAY) + timo++; + else + printf("RPC timeout for server 0x%x\n", + ntohl(sin->sin_addr.s_addr)); + + /* + * Wait for up to timo seconds for a reply. + * The socket receive timeout was set to 1 second. + */ + secs = timo; + while (secs > 0) { + if (from) { + m_freem(from); + from = NULL; + } + if (m) { + m_freem(m); + m = NULL; + } + auio.uio_resid = len = 1<<16; + rcvflg = 0; + error = soreceive(so, &from, &auio, &m, NULL, &rcvflg); + if (error == EWOULDBLOCK) { + secs--; + continue; + } + if (error) + goto out; + len -= auio.uio_resid; + + /* Does the reply contain at least a header? */ + if (len < MIN_REPLY_HDR) + continue; + if (m->m_len < MIN_REPLY_HDR) + continue; + reply = mtod(m, struct rpc_reply *); + + /* Is it the right reply? */ + if (reply->rp_direction != txdr_unsigned(RPC_REPLY)) + continue; + + if (reply->rp_xid != txdr_unsigned(xid)) + continue; + + /* Was RPC accepted? (authorization OK) */ + if (reply->rp_astatus != 0) { + error = fxdr_unsigned(u_int32_t, reply->rp_errno); + printf("rpc denied, error=%d\n", error); + continue; + } + + /* Did the call succeed? */ + if (reply->rp_status != 0) { + error = fxdr_unsigned(u_int32_t, reply->rp_status); + printf("rpc denied, status=%d\n", error); + continue; + } + + goto gotreply; /* break two levels */ + + } /* while secs */ + } /* forever send/receive */ + + error = ETIMEDOUT; + goto out; + + gotreply: + + /* + * Get RPC reply header into first mbuf, + * get its length, then strip it off. + */ + len = sizeof(*reply); + if (m->m_len < len) { + m = m_pullup(m, len); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + } + reply = mtod(m, struct rpc_reply *); + if (reply->rp_auth.authtype != 0) { + len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen); + len = (len + 3) & ~3; /* XXX? */ + } + m_adj(m, len); + + /* result */ + *data = m; + if (from_p) { + *from_p = from; + from = NULL; + } + + out: + if (nam) m_freem(nam); + if (mhead) m_freem(mhead); + if (from) m_freem(from); + soclose(so); + return error; +} + +/* + * eXternal Data Representation routines. + * (but with non-standard args...) + */ + +/* + * String representation for RPC. + */ +struct xdr_string { + u_int32_t len; /* length without null or padding */ + char data[4]; /* data (longer, of course) */ + /* data is padded to a long-word boundary */ +}; + +struct mbuf * +xdr_string_encode(str, len) + char *str; + int len; +{ + struct mbuf *m; + struct xdr_string *xs; + int dlen; /* padded string length */ + int mlen; /* message length */ + + dlen = (len + 3) & ~3; + mlen = dlen + 4; + + m = m_get(M_WAIT, MT_DATA); + if (mlen > MLEN) { + if (mlen > MCLBYTES) + return(NULL); + MCLGET(m, M_WAIT); + if (m == NULL) + return NULL; + } + xs = mtod(m, struct xdr_string *); + m->m_len = mlen; + xs->len = txdr_unsigned(len); + bcopy(str, xs->data, len); + return (m); +} + +struct mbuf * +xdr_string_decode(m, str, len_p) + struct mbuf *m; + char *str; + int *len_p; /* bufsize - 1 */ +{ + struct xdr_string *xs; + int mlen; /* message length */ + int slen; /* string length */ + + if (m->m_len < 4) { + m = m_pullup(m, 4); + if (m == NULL) + return (NULL); + } + xs = mtod(m, struct xdr_string *); + slen = fxdr_unsigned(u_int32_t, xs->len); + mlen = 4 + ((slen + 3) & ~3); + + if (slen > *len_p) + slen = *len_p; + m_copydata(m, 4, slen, str); + m_adj(m, mlen); + + str[slen] = '\0'; + *len_p = slen; + + return (m); +} + + +/* + * Inet address in RPC messages + * (Note, really four ints, NOT chars. Blech.) + */ +struct xdr_inaddr { + u_int32_t atype; + u_int32_t addr[4]; +}; + +struct mbuf * +xdr_inaddr_encode(ia) + struct in_addr *ia; /* already in network order */ +{ + struct mbuf *m; + struct xdr_inaddr *xi; + u_int8_t *cp; + u_int32_t *ip; + + m = m_get(M_WAIT, MT_DATA); + xi = mtod(m, struct xdr_inaddr *); + m->m_len = sizeof(*xi); + xi->atype = txdr_unsigned(1); + ip = xi->addr; + cp = (u_int8_t *)&ia->s_addr; + *ip++ = txdr_unsigned(*cp++); + *ip++ = txdr_unsigned(*cp++); + *ip++ = txdr_unsigned(*cp++); + *ip++ = txdr_unsigned(*cp++); + + return (m); +} + +struct mbuf * +xdr_inaddr_decode(m, ia) + struct mbuf *m; + struct in_addr *ia; /* already in network order */ +{ + struct xdr_inaddr *xi; + u_int8_t *cp; + u_int32_t *ip; + + if (m->m_len < sizeof(*xi)) { + m = m_pullup(m, sizeof(*xi)); + if (m == NULL) + return (NULL); + } + xi = mtod(m, struct xdr_inaddr *); + if (xi->atype != txdr_unsigned(1)) { + ia->s_addr = INADDR_ANY; + goto out; + } + ip = xi->addr; + cp = (u_int8_t *)&ia->s_addr; + *cp++ = fxdr_unsigned(u_int8_t, *ip++); + *cp++ = fxdr_unsigned(u_int8_t, *ip++); + *cp++ = fxdr_unsigned(u_int8_t, *ip++); + *cp++ = fxdr_unsigned(u_int8_t, *ip++); + +out: + m_adj(m, sizeof(*xi)); + return (m); +} diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h new file mode 100644 index 00000000000..f8f65bb0d09 --- /dev/null +++ b/sys/nfs/nfs.h @@ -0,0 +1,308 @@ +/* $NetBSD: nfs.h,v 1.8 1995/03/26 20:37:29 jtc Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Tunable constants for nfs + */ + +#define NFS_MAXIOVEC 34 +#define NFS_HZ 25 /* Ticks per second for NFS timeouts */ +#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */ +#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */ +#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */ +#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/ +#define NFS_MAXREXMIT 100 /* Stop counting after this many */ +#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ +#define NFS_RETRANS 10 /* Num of retrans for soft mounts */ +#define NFS_MAXGRPS 16 /* Max. size of groups list */ +#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ +#define NFS_MAXATTRTIMO 60 +#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ +#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ +#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ +#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ +#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */ +#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ +#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */ +#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */ +#define NMOD(a) ((a) % nfs_asyncdaemons) + +/* + * Set the attribute timeout based on how recently the file has been modified. + */ +#define NFS_ATTRTIMEO(np) \ + ((((np)->n_flag & NMODIFIED) || \ + (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ + ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ + (time.tv_sec - (np)->n_mtime) / 10)) + +/* + * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs + * should ever try and use it. + */ +struct nfsd_args { + int sock; /* Socket to serve */ + caddr_t name; /* Client address for connection based sockets */ + int namelen; /* Length of name */ +}; + +struct nfsd_srvargs { + struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ + uid_t nsd_uid; /* Effective uid mapped to cred */ + u_long nsd_haddr; /* Ip address of client */ + struct ucred nsd_cr; /* Cred. uid maps to */ + int nsd_authlen; /* Length of auth string (ret) */ + char *nsd_authstr; /* Auth string (ret) */ +}; + +struct nfsd_cargs { + char *ncd_dirp; /* Mount dir path */ + uid_t ncd_authuid; /* Effective uid */ + int ncd_authtype; /* Type of authenticator */ + int ncd_authlen; /* Length of authenticator string */ + char *ncd_authstr; /* Authenticator string */ +}; + +/* + * Stats structure + */ +struct nfsstats { + int attrcache_hits; + int attrcache_misses; + int lookupcache_hits; + int lookupcache_misses; + int direofcache_hits; + int direofcache_misses; + int biocache_reads; + int read_bios; + int read_physios; + int biocache_writes; + int write_bios; + int write_physios; + int biocache_readlinks; + int readlink_bios; + int biocache_readdirs; + int readdir_bios; + int rpccnt[NFS_NPROCS]; + int rpcretries; + int srvrpccnt[NFS_NPROCS]; + int srvrpc_errs; + int srv_errs; + int rpcrequests; + int rpctimeouts; + int rpcunexpected; + int rpcinvalid; + int srvcache_inproghits; + int srvcache_idemdonehits; + int srvcache_nonidemdonehits; + int srvcache_misses; + int srvnqnfs_leases; + int srvnqnfs_maxleases; + int srvnqnfs_getleases; +}; + +/* + * Flags for nfssvc() system call. + */ +#define NFSSVC_BIOD 0x002 +#define NFSSVC_NFSD 0x004 +#define NFSSVC_ADDSOCK 0x008 +#define NFSSVC_AUTHIN 0x010 +#define NFSSVC_GOTAUTH 0x040 +#define NFSSVC_AUTHINFAIL 0x080 +#define NFSSVC_MNTD 0x100 + +/* + * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. + * What should be in this set is open to debate, but I believe that since + * I/O system calls on ufs are never interrupted by signals the set should + * be minimal. My reasoning is that many current programs that use signals + * such as SIGALRM will not expect file I/O system calls to be interrupted + * by them and break. + */ +#ifdef _KERNEL +#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ + sigmask(SIGHUP)|sigmask(SIGQUIT)) + +/* + * Socket errors ignored for connectionless sockets?? + * For now, ignore them all + */ +#define NFSIGNORE_SOERROR(s, e) \ + ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ + ((s) & PR_CONNREQUIRED) == 0) + +/* + * Nfs outstanding request list element + */ +struct nfsreq { + TAILQ_ENTRY(nfsreq) r_chain; + struct mbuf *r_mreq; + struct mbuf *r_mrep; + struct mbuf *r_md; + caddr_t r_dpos; + struct nfsmount *r_nmp; + struct vnode *r_vp; + u_long r_xid; + int r_flags; /* flags on request, see below */ + int r_retry; /* max retransmission count */ + int r_rexmit; /* current retrans count */ + int r_timer; /* tick counter on reply */ + int r_procnum; /* NFS procedure number */ + int r_rtt; /* RTT for rpc */ + struct proc *r_procp; /* Proc that did I/O system call */ +}; + +/* + * Queue head for nfsreq's + */ +TAILQ_HEAD(, nfsreq) nfs_reqq; + +/* Flag values for r_flags */ +#define R_TIMING 0x01 /* timing request (in mntp) */ +#define R_SENT 0x02 /* request has been sent */ +#define R_SOFTTERM 0x04 /* soft mnt, too many retries */ +#define R_INTR 0x08 /* intr mnt, signal pending */ +#define R_SOCKERR 0x10 /* Fatal error on socket */ +#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ +#define R_MUSTRESEND 0x40 /* Must resend request */ +#define R_GETONEREP 0x80 /* Probe for one reply only */ + +struct nfsstats nfsstats; + +/* + * A list of nfssvc_sock structures is maintained with all the sockets + * that require service by the nfsd. + * The nfsuid structs hang off of the nfssvc_sock structs in both lru + * and uid hash lists. + */ +#define NUIDHASHSIZ 32 +#define NUIDHASH(sock, uid) \ + (&(sock)->ns_uidhashtbl[(uid) & (sock)->ns_uidhash]) + +/* + * Network address hash list element + */ +union nethostaddr { + u_long had_inetaddr; + struct mbuf *had_nam; +}; + +struct nfsuid { + TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ + LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ + int nu_flag; /* Flags */ + uid_t nu_uid; /* Uid mapped by this entry */ + union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ + struct ucred nu_cr; /* Cred uid mapped to */ +}; + +#define nu_inetaddr nu_haddr.had_inetaddr +#define nu_nam nu_haddr.had_nam +/* Bits for nu_flag */ +#define NU_INETADDR 0x1 + +struct nfssvc_sock { + TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ + TAILQ_HEAD(, nfsuid) ns_uidlruhead; + LIST_HEAD(, nfsuid) *ns_uidhashtbl; + u_long ns_uidhash; + + int ns_flag; + u_long ns_sref; + struct file *ns_fp; + struct socket *ns_so; + int ns_solock; + struct mbuf *ns_nam; + int ns_cc; + struct mbuf *ns_raw; + struct mbuf *ns_rawend; + int ns_reclen; + struct mbuf *ns_rec; + struct mbuf *ns_recend; + int ns_numuids; +}; + +/* Bits for "ns_flag" */ +#define SLP_VALID 0x01 +#define SLP_DOREC 0x02 +#define SLP_NEEDQ 0x04 +#define SLP_DISCONN 0x08 +#define SLP_GETSTREAM 0x10 +#define SLP_ALLFLAGS 0xff + +TAILQ_HEAD(, nfssvc_sock) nfssvc_sockhead; +int nfssvc_sockhead_flag; +#define SLP_INIT 0x01 +#define SLP_WANTINIT 0x02 + +/* + * One of these structures is allocated for each nfsd. + */ +struct nfsd { + TAILQ_ENTRY(nfsd) nd_chain; /* List of all nfsd's */ + int nd_flag; /* NFSD_ flags */ + struct nfssvc_sock *nd_slp; /* Current socket */ + struct mbuf *nd_nam; /* Client addr for datagram req. */ + struct mbuf *nd_mrep; /* Req. mbuf list */ + struct mbuf *nd_md; + caddr_t nd_dpos; /* Position in list */ + int nd_procnum; /* RPC procedure number */ + u_long nd_retxid; /* RPC xid */ + int nd_repstat; /* Reply status value */ + struct ucred nd_cr; /* Credentials for req. */ + int nd_nqlflag; /* Leasing flag */ + int nd_duration; /* Lease duration */ + int nd_authlen; /* Authenticator len */ + u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ + struct proc *nd_procp; /* Proc ptr */ +}; + +/* Bits for "nd_flag" */ +#define NFSD_WAITING 0x01 +#define NFSD_REQINPROG 0x02 +#define NFSD_NEEDAUTH 0x04 +#define NFSD_AUTHFAIL 0x08 + +TAILQ_HEAD(, nfsd) nfsd_head; +int nfsd_head_flag; +#define NFSD_CHECKSLP 0x01 + +#endif /* _KERNEL */ diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c new file mode 100644 index 00000000000..fd4d2b7fad9 --- /dev/null +++ b/sys/nfs/nfs_bio.c @@ -0,0 +1,826 @@ +/* $NetBSD: nfs_bio.c,v 1.21 1995/07/24 21:20:46 cgd Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/proc.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/trace.h> +#include <sys/mount.h> +#include <sys/kernel.h> + +#include <vm/vm.h> + +#include <nfs/nfsnode.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> + +struct buf *incore(), *nfs_getcacheblk(); +extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +extern int nfs_numasync; + +/* + * Vnode op for read using bio + * Any similarity to readip() is purely coincidental + */ +nfs_bioread(vp, uio, ioflag, cred) + register struct vnode *vp; + register struct uio *uio; + int ioflag; + struct ucred *cred; +{ + register struct nfsnode *np = VTONFS(vp); + register int biosize, diff; + struct buf *bp, *rabp; + struct vattr vattr; + struct proc *p; + struct nfsmount *nmp; + daddr_t lbn, bn, rabn; + caddr_t baddr; + int got_buf, nra, error = 0, n, on, not_readin; + +#ifdef lint + ioflag = ioflag; +#endif /* lint */ +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("nfs_read mode"); +#endif + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0 && vp->v_type != VDIR) + return (EINVAL); + nmp = VFSTONFS(vp->v_mount); + biosize = nmp->nm_rsize; + p = uio->uio_procp; + /* + * For nfs, cache consistency can only be maintained approximately. + * Although RFC1094 does not specify the criteria, the following is + * believed to be compatible with the reference port. + * For nqnfs, full cache consistency is maintained within the loop. + * For nfs: + * If the file's modify time on the server has changed since the + * last read rpc or you have written to the file, + * you may have lost data cache consistency with the + * server, so flush all of the file's data out of the cache. + * Then force a getattr rpc to ensure that you have up to date + * attributes. + * The mount flag NFSMNT_MYWRITE says "Assume that my writes are + * the ones changing the modify time. + * NB: This implies that cache data can be read when up to + * NFS_ATTRTIMEO seconds out of date. If you find that you need current + * attributes this could be forced by setting n_attrstamp to 0 before + * the VOP_GETATTR() call. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { + if (np->n_flag & NMODIFIED) { + if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || + vp->v_type != VREG) { + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + } + np->n_attrstamp = 0; + np->n_direofoffset = 0; + if (error = VOP_GETATTR(vp, &vattr, cred, p)) + return (error); + np->n_mtime = vattr.va_mtime.ts_sec; + } else { + if (error = VOP_GETATTR(vp, &vattr, cred, p)) + return (error); + if (np->n_mtime != vattr.va_mtime.ts_sec) { + np->n_direofoffset = 0; + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + np->n_mtime = vattr.va_mtime.ts_sec; + } + } + } + do { + + /* + * Get a valid lease. If cached data is stale, flush it. + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKINVALID(vp, np, NQL_READ)) { + do { + error = nqnfs_getlease(vp, NQL_READ, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) + return (error); + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE) || + ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { + if (vp->v_type == VDIR) { + np->n_direofoffset = 0; + cache_purge(vp); + } + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + np->n_brev = np->n_lrev; + } + } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { + np->n_direofoffset = 0; + cache_purge(vp); + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + } + } + if (np->n_flag & NQNFSNONCACHE) { + switch (vp->v_type) { + case VREG: + error = nfs_readrpc(vp, uio, cred); + break; + case VLNK: + error = nfs_readlinkrpc(vp, uio, cred); + break; + case VDIR: + error = nfs_readdirrpc(vp, uio, cred); + break; + }; + return (error); + } + baddr = (caddr_t)0; + switch (vp->v_type) { + case VREG: + nfsstats.biocache_reads++; + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize-1); + bn = lbn * (biosize / DEV_BSIZE); + not_readin = 1; + + /* + * Start the read ahead(s), as required. + */ + if (nfs_numasync > 0 && nmp->nm_readahead > 0 && + lbn == vp->v_lastr + 1) { + for (nra = 0; nra < nmp->nm_readahead && + (lbn + 1 + nra) * biosize < np->n_size; nra++) { + rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); + if (!incore(vp, rabn)) { + rabp = nfs_getcacheblk(vp, rabn, biosize, p); + if (!rabp) + return (EINTR); + if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { + rabp->b_flags |= (B_READ | B_ASYNC); + if (nfs_asyncio(rabp, cred)) { + rabp->b_flags |= B_INVAL; + brelse(rabp); + } + } else + brelse(rabp); + } + } + } + + /* + * If the block is in the cache and has the required data + * in a valid region, just copy it out. + * Otherwise, get the block and write back/read in, + * as required. + */ + if ((bp = incore(vp, bn)) && + (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == + (B_BUSY | B_WRITEINPROG)) + got_buf = 0; + else { +again: + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); + got_buf = 1; + if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { + bp->b_flags |= B_READ; + not_readin = 0; + if (error = nfs_doio(bp, cred, p)) { + brelse(bp); + return (error); + } + } + } + n = min((unsigned)(biosize - on), uio->uio_resid); + diff = np->n_size - uio->uio_offset; + if (diff < n) + n = diff; + if (not_readin && n > 0) { + if (on < bp->b_validoff || (on + n) > bp->b_validend) { + if (!got_buf) { + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); + got_buf = 1; + } + bp->b_flags |= B_INVAL; + if (bp->b_dirtyend > 0) { + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfsbioread"); + if (VOP_BWRITE(bp) == EINTR) + return (EINTR); + } else + brelse(bp); + goto again; + } + } + vp->v_lastr = lbn; + diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); + if (diff < n) + n = diff; + break; + case VLNK: + nfsstats.biocache_readlinks++; + bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); + if (!bp) + return (EINTR); + if ((bp->b_flags & B_DONE) == 0) { + bp->b_flags |= B_READ; + if (error = nfs_doio(bp, cred, p)) { + brelse(bp); + return (error); + } + } + n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); + got_buf = 1; + on = 0; + break; + case VDIR: + if (uio->uio_resid < NFS_DIRBLKSIZ) + return (0); + nfsstats.biocache_readdirs++; + bn = (daddr_t)uio->uio_offset; + bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); + if (!bp) + return (EINTR); + if ((bp->b_flags & B_DONE) == 0) { + bp->b_flags |= B_READ; + if (error = nfs_doio(bp, cred, p)) { + brelse(bp); + return (error); + } + } + + /* + * If not eof and read aheads are enabled, start one. + * (You need the current block first, so that you have the + * directory offset cookie of the next block. + */ + rabn = bp->b_blkno; + if (nfs_numasync > 0 && nmp->nm_readahead > 0 && + rabn != 0 && rabn != np->n_direofoffset && + !incore(vp, rabn)) { + rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); + if (rabp) { + if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { + rabp->b_flags |= (B_READ | B_ASYNC); + if (nfs_asyncio(rabp, cred)) { + rabp->b_flags |= B_INVAL; + brelse(rabp); + } + } else + brelse(rabp); + } + } + on = 0; + n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); + got_buf = 1; + break; + }; + + if (n > 0) { + if (!baddr) + baddr = bp->b_data; + error = uiomove(baddr + on, (int)n, uio); + } + switch (vp->v_type) { + case VLNK: + n = 0; + break; + case VDIR: + uio->uio_offset = bp->b_blkno; + break; + }; + if (got_buf) + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n > 0); + return (error); +} + +/* + * Vnode op for write using bio + */ +nfs_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register int biosize; + register struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + register struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + register struct ucred *cred = ap->a_cred; + int ioflag = ap->a_ioflag; + struct buf *bp; + struct vattr vattr; + struct nfsmount *nmp; + daddr_t lbn, bn; + int n, on, error = 0; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("nfs_write mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("nfs_write proc"); +#endif + if (vp->v_type != VREG) + return (EIO); + if (np->n_flag & NWRITEERR) { + np->n_flag &= ~NWRITEERR; + return (np->n_error); + } + if (ioflag & (IO_APPEND | IO_SYNC)) { + if (np->n_flag & NMODIFIED) { + np->n_attrstamp = 0; + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + } + if (ioflag & IO_APPEND) { + np->n_attrstamp = 0; + if (error = VOP_GETATTR(vp, &vattr, cred, p)) + return (error); + uio->uio_offset = np->n_size; + } + } + nmp = VFSTONFS(vp->v_mount); + if (uio->uio_offset < 0) + return (EINVAL); + if (uio->uio_resid == 0) + return (0); + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, i don't think it matters + */ + if (p && uio->uio_offset + uio->uio_resid > + p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { + psignal(p, SIGXFSZ); + return (EFBIG); + } + /* + * I use nm_rsize, not nm_wsize so that all buffer cache blocks + * will be the same size within a filesystem. nfs_writerpc will + * still use nm_wsize when sizing the rpc's. + */ + biosize = nmp->nm_rsize; + do { + + /* + * XXX make sure we aren't cached in the VM page cache + */ + (void)vnode_pager_uncache(vp); + + /* + * Check for a valid write lease. + * If non-cachable, just do the rpc + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, NQL_WRITE)) { + do { + error = nqnfs_getlease(vp, NQL_WRITE, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) + return (error); + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + np->n_brev = np->n_lrev; + } + } + if (np->n_flag & NQNFSNONCACHE) + return (nfs_writerpc(vp, uio, cred, ioflag)); + nfsstats.biocache_writes++; + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize-1); + n = min((unsigned)(biosize - on), uio->uio_resid); + bn = lbn * (biosize / DEV_BSIZE); +again: + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); + if (bp->b_wcred == NOCRED) { + crhold(cred); + bp->b_wcred = cred; + } + np->n_flag |= NMODIFIED; + if (uio->uio_offset + n > np->n_size) { + np->n_size = uio->uio_offset + n; + vnode_pager_setsize(vp, (u_long)np->n_size); + } + + /* + * If the new write will leave a contiguous dirty + * area, just update the b_dirtyoff and b_dirtyend, + * otherwise force a write rpc of the old dirty area. + */ + if (bp->b_dirtyend > 0 && + (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + bp->b_proc = p; + if (VOP_BWRITE(bp) == EINTR) + return (EINTR); + goto again; + } + + /* + * Check for valid write lease and get one as required. + * In case getblk() and/or bwrite() delayed us. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, NQL_WRITE)) { + do { + error = nqnfs_getlease(vp, NQL_WRITE, cred, p); + } while (error == NQNFS_EXPIRED); + if (error) { + brelse(bp); + return (error); + } + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + brelse(bp); + if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) + return (error); + np->n_brev = np->n_lrev; + goto again; + } + } + if (error = uiomove((char *)bp->b_data + on, n, uio)) { + bp->b_flags |= B_ERROR; + brelse(bp); + return (error); + } + if (bp->b_dirtyend > 0) { + bp->b_dirtyoff = min(on, bp->b_dirtyoff); + bp->b_dirtyend = max((on + n), bp->b_dirtyend); + } else { + bp->b_dirtyoff = on; + bp->b_dirtyend = on + n; + } +#ifndef notdef + if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || + bp->b_validoff > bp->b_dirtyend) { + bp->b_validoff = bp->b_dirtyoff; + bp->b_validend = bp->b_dirtyend; + } else { + bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); + bp->b_validend = max(bp->b_validend, bp->b_dirtyend); + } +#else + bp->b_validoff = bp->b_dirtyoff; + bp->b_validend = bp->b_dirtyend; +#endif + if (ioflag & IO_APPEND) + bp->b_flags |= B_APPENDWRITE; + + /* + * If the lease is non-cachable or IO_SYNC do bwrite(). + */ + if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { + bp->b_proc = p; + if (error = VOP_BWRITE(bp)) + return (error); + } else if ((n + on) == biosize && + (nmp->nm_flag & NFSMNT_NQNFS) == 0) { + bp->b_proc = (struct proc *)0; + bawrite(bp); + } else + bdwrite(bp); + } while (uio->uio_resid > 0 && n > 0); + return (0); +} + +/* + * Get an nfs cache block. + * Allocate a new one if the block isn't currently in the cache + * and return the block marked busy. If the calling process is + * interrupted by a signal for an interruptible mount point, return + * NULL. + */ +struct buf * +nfs_getcacheblk(vp, bn, size, p) + struct vnode *vp; + daddr_t bn; + int size; + struct proc *p; +{ + register struct buf *bp; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + + if (nmp->nm_flag & NFSMNT_INT) { + bp = getblk(vp, bn, size, PCATCH, 0); + while (bp == (struct buf *)0) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return ((struct buf *)0); + bp = getblk(vp, bn, size, 0, 2 * hz); + } + } else + bp = getblk(vp, bn, size, 0, 0); + return (bp); +} + +/* + * Flush and invalidate all dirty buffers. If another process is already + * doing the flush, just wait for completion. + */ +nfs_vinvalbuf(vp, flags, cred, p, intrflg) + struct vnode *vp; + int flags; + struct ucred *cred; + struct proc *p; + int intrflg; +{ + register struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int error = 0, slpflag, slptimeo; + + if ((nmp->nm_flag & NFSMNT_INT) == 0) + intrflg = 0; + if (intrflg) { + slpflag = PCATCH; + slptimeo = 2 * hz; + } else { + slpflag = 0; + slptimeo = 0; + } + /* + * First wait for any other process doing a flush to complete. + */ + while (np->n_flag & NFLUSHINPROG) { + np->n_flag |= NFLUSHWANT; + error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", + slptimeo); + if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return (EINTR); + } + + /* + * Now, flush as required. + */ + np->n_flag |= NFLUSHINPROG; + error = vinvalbuf(vp, flags, cred, p, slpflag, 0); + while (error) { + if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { + np->n_flag &= ~NFLUSHINPROG; + if (np->n_flag & NFLUSHWANT) { + np->n_flag &= ~NFLUSHWANT; + wakeup((caddr_t)&np->n_flag); + } + return (EINTR); + } + error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); + } + np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); + if (np->n_flag & NFLUSHWANT) { + np->n_flag &= ~NFLUSHWANT; + wakeup((caddr_t)&np->n_flag); + } + return (0); +} + +/* + * Initiate asynchronous I/O. Return an error if no nfsiods are available. + * This is mainly to avoid queueing async I/O requests when the nfsiods + * are all hung on a dead server. + */ +nfs_asyncio(bp, cred) + register struct buf *bp; + struct ucred *cred; +{ + register int i; + + if (nfs_numasync == 0) + return (EIO); + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + if (nfs_iodwant[i]) { + if (bp->b_flags & B_READ) { + if (bp->b_rcred == NOCRED && cred != NOCRED) { + crhold(cred); + bp->b_rcred = cred; + } + } else { + if (bp->b_wcred == NOCRED && cred != NOCRED) { + crhold(cred); + bp->b_wcred = cred; + } + } + + TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); + nfs_iodwant[i] = (struct proc *)0; + wakeup((caddr_t)&nfs_iodwant[i]); + return (0); + } + return (EIO); +} + +/* + * Do an I/O operation to/from a cache block. This may be called + * synchronously or from an nfsiod. + */ +int +nfs_doio(bp, cr, p) + register struct buf *bp; + struct cred *cr; + struct proc *p; +{ + register struct uio *uiop; + register struct vnode *vp; + struct nfsnode *np; + struct nfsmount *nmp; + int error, diff, len; + struct uio uio; + struct iovec io; + + vp = bp->b_vp; + np = VTONFS(vp); + nmp = VFSTONFS(vp->v_mount); + uiop = &uio; + uiop->uio_iov = &io; + uiop->uio_iovcnt = 1; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = p; + + /* + * Historically, paging was done with physio, but no more... + */ + if (bp->b_flags & B_PHYS) { + /* + * ...though reading /dev/drum still gets us here. + */ + io.iov_len = uiop->uio_resid = bp->b_bcount; + /* mapping was done by vmapbuf() */ + io.iov_base = bp->b_data; + uiop->uio_offset = bp->b_blkno * DEV_BSIZE; + if (bp->b_flags & B_READ) { + uiop->uio_rw = UIO_READ; + nfsstats.read_physios++; + error = nfs_readrpc(vp, uiop, cr); + } else { + uiop->uio_rw = UIO_WRITE; + nfsstats.write_physios++; + error = nfs_writerpc(vp, uiop, cr, 0); + } + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = error; + } + } else if (bp->b_flags & B_READ) { + io.iov_len = uiop->uio_resid = bp->b_bcount; + io.iov_base = bp->b_data; + uiop->uio_rw = UIO_READ; + switch (vp->v_type) { + case VREG: + uiop->uio_offset = bp->b_blkno * DEV_BSIZE; + nfsstats.read_bios++; + error = nfs_readrpc(vp, uiop, cr); + if (!error) { + bp->b_validoff = 0; + if (uiop->uio_resid) { + /* + * If len > 0, there is a hole in the file and + * no writes after the hole have been pushed to + * the server yet. + * Just zero fill the rest of the valid area. + */ + diff = bp->b_bcount - uiop->uio_resid; + len = np->n_size - (bp->b_blkno * DEV_BSIZE + + diff); + if (len > 0) { + len = min(len, uiop->uio_resid); + bzero((char *)bp->b_data + diff, len); + bp->b_validend = diff + len; + } else + bp->b_validend = diff; + } else + bp->b_validend = bp->b_bcount; + } + if (p && (vp->v_flag & VTEXT) && + (((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKINVALID(vp, np, NQL_READ) && + np->n_lrev != np->n_brev) || + (!(nmp->nm_flag & NFSMNT_NQNFS) && + np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { + uprintf("Process killed due to text file modification\n"); + psignal(p, SIGKILL); + p->p_holdcnt++; + } + break; + case VLNK: + uiop->uio_offset = 0; + nfsstats.readlink_bios++; + error = nfs_readlinkrpc(vp, uiop, cr); + break; + case VDIR: + uiop->uio_offset = bp->b_lblkno; + nfsstats.readdir_bios++; + if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) + error = nfs_readdirlookrpc(vp, uiop, cr); + else + error = nfs_readdirrpc(vp, uiop, cr); + /* + * Save offset cookie in b_blkno. + */ + bp->b_blkno = uiop->uio_offset; + break; + }; + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = error; + } + } else { + io.iov_len = uiop->uio_resid = bp->b_dirtyend + - bp->b_dirtyoff; + uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) + + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; + uiop->uio_rw = UIO_WRITE; + nfsstats.write_bios++; + if (bp->b_flags & B_APPENDWRITE) + error = nfs_writerpc(vp, uiop, cr, IO_APPEND); + else + error = nfs_writerpc(vp, uiop, cr, 0); + bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); + + /* + * For an interrupted write, the buffer is still valid and the + * write hasn't been pushed to the server yet, so we can't set + * B_ERROR and report the interruption by setting B_EINTR. For + * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt + * is essentially a noop. + */ + if (error == EINTR) { + bp->b_flags &= ~B_INVAL; + bp->b_flags |= B_DELWRI; + + /* + * Since for the B_ASYNC case, nfs_bwrite() has reassigned the + * buffer to the clean list, we have to reassign it back to the + * dirty one. Ugh. + */ + if (bp->b_flags & B_ASYNC) + reassignbuf(bp, vp); + else + bp->b_flags |= B_EINTR; + } else { + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = np->n_error = error; + np->n_flag |= NWRITEERR; + } + bp->b_dirtyoff = bp->b_dirtyend = 0; + } + } + bp->b_resid = uiop->uio_resid; + biodone(bp); + return (error); +} diff --git a/sys/nfs/nfs_boot.c b/sys/nfs/nfs_boot.c new file mode 100644 index 00000000000..06514ce9d22 --- /dev/null +++ b/sys/nfs/nfs_boot.c @@ -0,0 +1,536 @@ +/* $NetBSD: nfs_boot.c,v 1.19 1995/06/12 00:48:31 mycroft Exp $ */ + +/* + * Copyright (c) 1995 Adam Glass, Gordon Ross + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/ioctl.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/mbuf.h> +#include <sys/reboot.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/if_ether.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsdiskless.h> +#include <nfs/krpc.h> +#include <nfs/xdr_subs.h> + +#include "ether.h" +#if NETHER == 0 + +int nfs_boot_init(nd, procp) + struct nfs_diskless *nd; + struct proc *procp; +{ + panic("nfs_boot_init: no ether"); +} + +#else /* NETHER */ + +/* + * Support for NFS diskless booting, specifically getting information + * about where to boot from, what pathnames, etc. + * + * This implememtation uses RARP and the bootparam RPC. + * We are forced to implement RPC anyway (to get file handles) + * so we might as well take advantage of it for bootparam too. + * + * The diskless boot sequence goes as follows: + * (1) Use RARP to get our interface address + * (2) Use RPC/bootparam/whoami to get our hostname, + * our IP address, and the server's IP address. + * (3) Use RPC/bootparam/getfile to get the root path + * (4) Use RPC/mountd to get the root file handle + * (5) Use RPC/bootparam/getfile to get the swap path + * (6) Use RPC/mountd to get the swap file handle + * + * (This happens to be the way Sun does it too.) + */ + +/* bootparam RPC */ +static int bp_whoami __P((struct sockaddr_in *bpsin, + struct in_addr *my_ip, struct in_addr *gw_ip)); +static int bp_getfile __P((struct sockaddr_in *bpsin, char *key, + struct sockaddr_in *mdsin, char *servname, char *path)); + +/* mountd RPC */ +static int md_mount __P((struct sockaddr_in *mdsin, char *path, + u_char *fh)); + +/* other helpers */ +static void get_path_and_handle __P((struct sockaddr_in *bpsin, + char *key, struct nfs_dlmount *ndmntp)); + +char *nfsbootdevname; + +/* + * Called with an empty nfs_diskless struct to be filled in. + */ +int +nfs_boot_init(nd, procp) + struct nfs_diskless *nd; + struct proc *procp; +{ + struct ifreq ireq; + struct in_addr my_ip, gw_ip; + struct sockaddr_in bp_sin; + struct sockaddr_in *sin; + struct ifnet *ifp; + struct socket *so; + int error; + + /* + * Find an interface, rarp for its ip address, stuff it, the + * implied broadcast addr, and netmask into a nfs_diskless struct. + * + * This was moved here from nfs_vfsops.c because this procedure + * would be quite different if someone decides to write (i.e.) a + * BOOTP version of this file (might not use RARP, etc.) + */ + + /* + * Find a network interface. + */ + if (nfsbootdevname) + ifp = ifunit(nfsbootdevname); + else + for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) + if ((ifp->if_flags & + (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) + break; + if (ifp == NULL) + panic("nfs_boot: no suitable interface"); + sprintf(ireq.ifr_name, "%s%d", ifp->if_name, ifp->if_unit); + printf("nfs_boot: using network interface '%s'\n", + ireq.ifr_name); + + /* + * Bring up the interface. + * + * Get the old interface flags and or IFF_UP into them; if + * IFF_UP set blindly, interface selection can be clobbered. + */ + if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0)) != 0) + panic("nfs_boot: socreate, error=%d", error); + error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ireq, procp); + if (error) + panic("nfs_boot: GIFFLAGS, error=%d", error); + ireq.ifr_flags |= IFF_UP; + error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)&ireq, procp); + if (error) + panic("nfs_boot: SIFFLAGS, error=%d", error); + + /* + * Do RARP for the interface address. + */ + if ((error = revarpwhoami(&my_ip, ifp)) != 0) + panic("revarp failed, error=%d", error); + printf("nfs_boot: client_addr=0x%x\n", ntohl(my_ip.s_addr)); + + /* + * Do enough of ifconfig(8) so that the chosen interface + * can talk to the servers. (just set the address) + */ + sin = (struct sockaddr_in *)&ireq.ifr_addr; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = my_ip.s_addr; + error = ifioctl(so, SIOCSIFADDR, (caddr_t)&ireq, procp); + if (error) + panic("nfs_boot: set if addr, error=%d", error); + + soclose(so); + + /* + * Get client name and gateway address. + * RPC: bootparam/whoami + * Use the old broadcast address for the WHOAMI + * call because we do not yet know our netmask. + * The server address returned by the WHOAMI call + * is used for all subsequent booptaram RPCs. + */ + bzero((caddr_t)&bp_sin, sizeof(bp_sin)); + bp_sin.sin_len = sizeof(bp_sin); + bp_sin.sin_family = AF_INET; + bp_sin.sin_addr.s_addr = INADDR_BROADCAST; + hostnamelen = MAXHOSTNAMELEN; + + /* this returns gateway IP address */ + error = bp_whoami(&bp_sin, &my_ip, &gw_ip); + if (error) + panic("nfs_boot: bootparam whoami, error=%d", error); + printf("nfs_boot: server_addr=0x%x\n", + ntohl(bp_sin.sin_addr.s_addr)); + printf("nfs_boot: hostname=%s\n", hostname); + +#ifdef NFS_BOOT_GATEWAY + /* + * XXX - This code is conditionally compiled only because + * many bootparam servers (in particular, SunOS 4.1.3) + * always set the gateway address to their own address. + * The bootparam server is not necessarily the gateway. + * We could just believe the server, and at worst you would + * need to delete the incorrect default route before adding + * the correct one, but for simplicity, ignore the gateway. + * If your server is OK, you can turn on this option. + * + * If the gateway address is set, add a default route. + * (The mountd RPCs may go across a gateway.) + */ + if (gw_ip.s_addr) { + struct sockaddr dst, gw, mask; + /* Destination: (default) */ + bzero((caddr_t)&dst, sizeof(dst)); + dst.sa_len = sizeof(dst); + dst.sa_family = AF_INET; + /* Gateway: */ + bzero((caddr_t)&gw, sizeof(gw)); + sin = (struct sockaddr_in *)&gw; + sin->sin_len = sizeof(gw); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = gw_ip.s_addr; + /* Mask: (zero length) */ + bzero(&mask, sizeof(mask)); + + printf("nfs_boot: gateway=0x%x\n", ntohl(gw_ip.s_addr)); + /* add, dest, gw, mask, flags, 0 */ + error = rtrequest(RTM_ADD, &dst, (struct sockaddr *)&gw, + &mask, (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL); + if (error) + printf("nfs_boot: add route, error=%d\n", error); + } +#endif + + get_path_and_handle(&bp_sin, "root", &nd->nd_root); + get_path_and_handle(&bp_sin, "swap", &nd->nd_swap); + + return (0); +} + +static void +get_path_and_handle(bpsin, key, ndmntp) + struct sockaddr_in *bpsin; /* bootparam server */ + char *key; /* root or swap */ + struct nfs_dlmount *ndmntp; /* output */ +{ + char pathname[MAXPATHLEN]; + char *sp, *dp, *endp; + int error; + + /* + * Get server:pathname for "key" (root or swap) + * using RPC to bootparam/getfile + */ + error = bp_getfile(bpsin, key, &ndmntp->ndm_saddr, + ndmntp->ndm_host, pathname); + if (error) + panic("nfs_boot: bootparam get %s: %d", key, error); + + /* + * Get file handle for "key" (root or swap) + * using RPC to mountd/mount + */ + error = md_mount(&ndmntp->ndm_saddr, pathname, ndmntp->ndm_fh); + if (error) + panic("nfs_boot: mountd %s, error=%d", key, error); + + /* Construct remote path (for getmntinfo(3)) */ + dp = ndmntp->ndm_host; + endp = dp + MNAMELEN - 1; + dp += strlen(dp); + *dp++ = ':'; + for (sp = pathname; *sp && dp < endp;) + *dp++ = *sp++; + *dp = '\0'; + +} + + +/* + * RPC: bootparam/whoami + * Given client IP address, get: + * client name (hostname) + * domain name (domainname) + * gateway address + * + * The hostname and domainname are set here for convenience. + * + * Note - bpsin is initialized to the broadcast address, + * and will be replaced with the bootparam server address + * after this call is complete. Have to use PMAP_PROC_CALL + * to make sure we get responses only from a servers that + * know about us (don't want to broadcast a getport call). + */ +static int +bp_whoami(bpsin, my_ip, gw_ip) + struct sockaddr_in *bpsin; + struct in_addr *my_ip; + struct in_addr *gw_ip; +{ + /* RPC structures for PMAPPROC_CALLIT */ + struct whoami_call { + u_int32_t call_prog; + u_int32_t call_vers; + u_int32_t call_proc; + u_int32_t call_arglen; + } *call; + struct callit_reply { + u_int32_t port; + u_int32_t encap_len; + /* encapsulated data here */ + } *reply; + + struct mbuf *m, *from; + struct sockaddr_in *sin; + int error, msg_len; + int16_t port; + + /* + * Build request message for PMAPPROC_CALLIT. + */ + m = m_get(M_WAIT, MT_DATA); + call = mtod(m, struct whoami_call *); + m->m_len = sizeof(*call); + call->call_prog = txdr_unsigned(BOOTPARAM_PROG); + call->call_vers = txdr_unsigned(BOOTPARAM_VERS); + call->call_proc = txdr_unsigned(BOOTPARAM_WHOAMI); + + /* + * append encapsulated data (client IP address) + */ + m->m_next = xdr_inaddr_encode(my_ip); + call->call_arglen = txdr_unsigned(m->m_next->m_len); + + /* RPC: portmap/callit */ + bpsin->sin_port = htons(PMAPPORT); + from = NULL; + error = krpc_call(bpsin, PMAPPROG, PMAPVERS, + PMAPPROC_CALLIT, &m, &from); + if (error) + return error; + + /* + * Parse result message. + */ + if (m->m_len < sizeof(*reply)) { + m = m_pullup(m, sizeof(*reply)); + if (m == NULL) + goto bad; + } + reply = mtod(m, struct callit_reply *); + port = fxdr_unsigned(u_int32_t, reply->port); + msg_len = fxdr_unsigned(u_int32_t, reply->encap_len); + m_adj(m, sizeof(*reply)); + + /* + * Save bootparam server address + */ + sin = mtod(from, struct sockaddr_in *); + bpsin->sin_port = htons(port); + bpsin->sin_addr.s_addr = sin->sin_addr.s_addr; + + /* client name */ + hostnamelen = MAXHOSTNAMELEN-1; + m = xdr_string_decode(m, hostname, &hostnamelen); + if (m == NULL) + goto bad; + + /* domain name */ + domainnamelen = MAXHOSTNAMELEN-1; + m = xdr_string_decode(m, domainname, &domainnamelen); + if (m == NULL) + goto bad; + + /* gateway address */ + m = xdr_inaddr_decode(m, gw_ip); + if (m == NULL) + goto bad; + + /* success */ + goto out; + +bad: + printf("nfs_boot: bootparam_whoami: bad reply\n"); + error = EBADRPC; + +out: + if (from) + m_freem(from); + if (m) + m_freem(m); + return(error); +} + + +/* + * RPC: bootparam/getfile + * Given client name and file "key", get: + * server name + * server IP address + * server pathname + */ +static int +bp_getfile(bpsin, key, md_sin, serv_name, pathname) + struct sockaddr_in *bpsin; + char *key; + struct sockaddr_in *md_sin; + char *serv_name; + char *pathname; +{ + struct mbuf *m; + struct sockaddr_in *sin; + struct in_addr inaddr; + int error, sn_len, path_len; + + /* + * Build request message. + */ + + /* client name (hostname) */ + m = xdr_string_encode(hostname, hostnamelen); + + /* key name (root or swap) */ + m->m_next = xdr_string_encode(key, strlen(key)); + + /* RPC: bootparam/getfile */ + error = krpc_call(bpsin, BOOTPARAM_PROG, BOOTPARAM_VERS, + BOOTPARAM_GETFILE, &m, NULL); + if (error) + return error; + + /* + * Parse result message. + */ + + /* server name */ + sn_len = MNAMELEN-1; + m = xdr_string_decode(m, serv_name, &sn_len); + if (m == NULL) + goto bad; + + /* server IP address (mountd/NFS) */ + m = xdr_inaddr_decode(m, &inaddr); + if (m == NULL) + goto bad; + + /* server pathname */ + path_len = MAXPATHLEN-1; + m = xdr_string_decode(m, pathname, &path_len); + if (m == NULL) + goto bad; + + /* setup server socket address */ + sin = md_sin; + bzero((caddr_t)sin, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr = inaddr; + + /* success */ + goto out; + +bad: + printf("nfs_boot: bootparam_getfile: bad reply\n"); + error = EBADRPC; + +out: + m_freem(m); + return(0); +} + + +/* + * RPC: mountd/mount + * Given a server pathname, get an NFS file handle. + * Also, sets sin->sin_port to the NFS service port. + */ +static int +md_mount(mdsin, path, fhp) + struct sockaddr_in *mdsin; /* mountd server address */ + char *path; + u_char *fhp; +{ + /* The RPC structures */ + struct rdata { + u_int32_t errno; + u_char fh[NFS_FHSIZE]; + } *rdata; + struct mbuf *m; + int error; + + /* Get port number for MOUNTD. */ + error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1, + &mdsin->sin_port); + if (error) return error; + + m = xdr_string_encode(path, strlen(path)); + + /* Do RPC to mountd. */ + error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1, + RPCMNT_MOUNT, &m, NULL); + if (error) + return error; /* message already freed */ + + if (m->m_len < sizeof(*rdata)) { + m = m_pullup(m, sizeof(*rdata)); + if (m == NULL) + goto bad; + } + rdata = mtod(m, struct rdata *); + error = fxdr_unsigned(u_int32_t, rdata->errno); + if (error) + goto bad; + bcopy(rdata->fh, fhp, NFS_FHSIZE); + + /* Set port number for NFS use. */ + error = krpc_portmap(mdsin, NFS_PROG, NFS_VER2, + &mdsin->sin_port); + goto out; + +bad: + error = EBADRPC; + +out: + m_freem(m); + return error; +} + +#endif /* NETHER */ diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c new file mode 100644 index 00000000000..8007dfd9e84 --- /dev/null +++ b/sys/nfs/nfs_node.c @@ -0,0 +1,282 @@ +/* $NetBSD: nfs_node.c,v 1.13 1994/08/18 22:47:46 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_node.c 8.2 (Berkeley) 12/30/93 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/malloc.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> + +#define NFSNOHASH(fhsum) \ + (&nfsnodehashtbl[(fhsum) & nfsnodehash]) +LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; +u_long nfsnodehash; + +#define TRUE 1 +#define FALSE 0 + +/* + * Initialize hash links for nfsnodes + * and build nfsnode free list. + */ +nfs_nhinit() +{ + +#ifndef lint + if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode)) + printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode)); +#endif /* not lint */ + nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash); +} + +/* + * Compute an entry in the NFS hash table structure + */ +struct nfsnodehashhead * +nfs_hash(fhp) + register nfsv2fh_t *fhp; +{ + register u_char *fhpp; + register u_long fhsum; + int i; + + fhpp = &fhp->fh_bytes[0]; + fhsum = 0; + for (i = 0; i < NFSX_FH; i++) + fhsum += *fhpp++; + return (NFSNOHASH(fhsum)); +} + +/* + * Look up a vnode/nfsnode by file handle. + * Callers must check for mount points!! + * In all cases, a pointer to a + * nfsnode structure is returned. + */ +nfs_nget(mntp, fhp, npp) + struct mount *mntp; + register nfsv2fh_t *fhp; + struct nfsnode **npp; +{ + register struct nfsnode *np; + struct nfsnodehashhead *nhpp; + register struct vnode *vp; + extern int (**nfsv2_vnodeop_p)(); + struct vnode *nvp; + int error; + + nhpp = nfs_hash(fhp); +loop: + for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { + if (mntp != NFSTOV(np)->v_mount || + bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH)) + continue; + vp = NFSTOV(np); + if (vget(vp, 1)) + goto loop; + *npp = np; + return(0); + } + if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) { + *npp = 0; + return (error); + } + vp = nvp; + MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK); + vp->v_data = np; + np->n_vnode = vp; + /* + * Insert the nfsnode in the hash queue for its new file handle + */ + np->n_flag = 0; + LIST_INSERT_HEAD(nhpp, np, n_hash); + bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH); + np->n_attrstamp = 0; + np->n_direofoffset = 0; + np->n_sillyrename = (struct sillyrename *)0; + np->n_size = 0; + np->n_mtime = 0; + np->n_lockf = 0; + if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) { + np->n_brev = 0; + np->n_lrev = 0; + np->n_expiry = (time_t)0; + np->n_timer.cqe_next = (struct nfsnode *)0; + } + *npp = np; + return (0); +} + +nfs_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct nfsnode *np; + register struct sillyrename *sp; + struct proc *p = curproc; /* XXX */ + extern int prtactive; + + np = VTONFS(ap->a_vp); + if (prtactive && ap->a_vp->v_usecount != 0) + vprint("nfs_inactive: pushing active", ap->a_vp); + sp = np->n_sillyrename; + np->n_sillyrename = (struct sillyrename *)0; + if (sp) { + /* + * Remove the silly file that was rename'd earlier + */ + (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); + nfs_removeit(sp); + crfree(sp->s_cred); + vrele(sp->s_dvp); +#ifdef SILLYSEPARATE + free((caddr_t)sp, M_NFSREQ); +#endif + } + np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED | + NQNFSNONCACHE | NQNFSWRITE); + return (0); +} + +/* + * Reclaim an nfsnode so that it can be used for other purposes. + */ +nfs_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct nfsmount *nmp = VFSTONFS(vp->v_mount); + extern int prtactive; + + if (prtactive && vp->v_usecount != 0) + vprint("nfs_reclaim: pushing active", vp); + LIST_REMOVE(np, n_hash); + + /* + * For nqnfs, take it off the timer queue as required. + */ + if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_timer.cqe_next != 0) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + } + cache_purge(vp); + FREE(vp->v_data, M_NFSNODE); + vp->v_data = (void *)0; + return (0); +} + +/* + * Lock an nfsnode + */ +nfs_lock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + /* + * Ugh, another place where interruptible mounts will get hung. + * If you make this sleep interruptible, then you have to fix all + * the VOP_LOCK() calls to expect interruptibility. + */ + while (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + sleep((caddr_t)vp, PINOD); + } + if (vp->v_tag == VT_NON) + return (ENOENT); + return (0); +} + +/* + * Unlock an nfsnode + */ +nfs_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + return (0); +} + +/* + * Check for a locked nfsnode + */ +nfs_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + return (0); +} + +/* + * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually + * done. Currently nothing to do. + */ +/* ARGSUSED */ +int +nfs_abortop(ap) + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap; +{ + + if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) + FREE(ap->a_cnp->cn_pnbuf, M_NAMEI); + return (0); +} diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c new file mode 100644 index 00000000000..29437a9e987 --- /dev/null +++ b/sys/nfs/nfs_nqlease.c @@ -0,0 +1,1183 @@ +/* $NetBSD: nfs_nqlease.c,v 1.10 1995/06/18 14:48:01 cgd Exp $ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_nqlease.c 8.5 (Berkeley) 8/18/94 + */ + +/* + * References: + * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant + * Mechanism for Distributed File Cache Consistency", + * In Proc. of the Twelfth ACM Symposium on Operating Systems + * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989. + * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching + * in the Sprite Network File System", ACM TOCS 6(1), + * pages 134-154, February 1988. + * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and + * Performance of Cache-Consistency Protocols", Digital + * Equipment Corporation WRL Research Report 89/5, May 1989. + */ +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/file.h> +#include <sys/buf.h> +#include <sys/stat.h> +#include <sys/protosw.h> + +#include <netinet/in.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> + +time_t nqnfsstarttime = (time_t)0; +u_long nqnfs_prog, nqnfs_vers; +int nqsrv_clockskew = NQ_CLOCKSKEW; +int nqsrv_writeslack = NQ_WRITESLACK; +int nqsrv_maxlease = NQ_MAXLEASE; +int nqsrv_maxnumlease = NQ_MAXNUMLEASE; +void nqsrv_instimeq(), nqsrv_send_eviction(), nfs_sndunlock(); +void nqsrv_unlocklease(), nqsrv_waitfor_expiry(), nfsrv_slpderef(); +void nqsrv_addhost(), nqsrv_locklease(), nqnfs_serverd(); +void nqnfs_clientlease(); +struct mbuf *nfsm_rpchead(); + +/* + * Signifies which rpcs can have piggybacked lease requests + */ +int nqnfs_piggy[NFS_NPROCS] = { + 0, + NQL_READ, + NQL_WRITE, + 0, + NQL_READ, + NQL_READ, + NQL_READ, + 0, + NQL_WRITE, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + NQL_READ, + 0, + NQL_READ, + 0, + 0, + 0, + 0, +}; + +extern nfstype nfs_type[9]; +extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; +extern int nfsd_waiting; + +#define TRUE 1 +#define FALSE 0 + +/* + * Get or check for a lease for "vp", based on NQL_CHECK flag. + * The rules are as follows: + * - if a current non-caching lease, reply non-caching + * - if a current lease for same host only, extend lease + * - if a read cachable lease and a read lease request + * add host to list any reply cachable + * - else { set non-cachable for read-write sharing } + * send eviction notice messages to all other hosts that have lease + * wait for lease termination { either by receiving vacated messages + * from all the other hosts or expiry + * via. timeout } + * modify lease to non-cachable + * - else if no current lease, issue new one + * - reply + * - return boolean TRUE iff nam should be m_freem()'d + * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep() + * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease(). + * nqsrv_locklease() is coded such that at least one of LC_LOCKED and + * LC_WANTED is set whenever a process is tsleeping in it. The exception + * is when a new lease is being allocated, since it is not in the timer + * queue yet. (Ditto for the splsoftclock() and splx(s) calls) + */ +nqsrv_getlease(vp, duration, flags, nd, nam, cachablep, frev, cred) + struct vnode *vp; + u_long *duration; + int flags; + struct nfsd *nd; + struct mbuf *nam; + int *cachablep; + u_quad_t *frev; + struct ucred *cred; +{ + register struct nqlease *lp; + register struct nqfhhashhead *lpp; + register struct nqhost *lph; + struct nqlease *tlp; + struct nqm **lphp; + struct vattr vattr; + fhandle_t fh; + int i, ok, error, s; + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) + return (0); + if (*duration > nqsrv_maxlease) + *duration = nqsrv_maxlease; + if (error = VOP_GETATTR(vp, &vattr, cred, nd->nd_procp)) + return (error); + *frev = vattr.va_filerev; + s = splsoftclock(); + tlp = vp->v_lease; + if ((flags & NQL_CHECK) == 0) + nfsstats.srvnqnfs_getleases++; + if (tlp == 0) { + /* + * Find the lease by searching the hash list. + */ + fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; + if (error = VFS_VPTOFH(vp, &fh.fh_fid)) { + splx(s); + return (error); + } + lpp = NQFHHASH(fh.fh_fid.fid_data); + for (lp = lpp->lh_first; lp != 0; lp = lp->lc_hash.le_next) + if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] && + fh.fh_fsid.val[1] == lp->lc_fsid.val[1] && + !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata, + fh.fh_fid.fid_len - sizeof (long))) { + /* Found it */ + lp->lc_vp = vp; + vp->v_lease = lp; + tlp = lp; + break; + } + } else + lp = tlp; + if (lp != 0) { + if ((lp->lc_flag & LC_NONCACHABLE) || + (lp->lc_morehosts == (struct nqm *)0 && + nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host))) + goto doreply; + if ((flags & NQL_READ) && (lp->lc_flag & LC_WRITE) == 0) { + if (flags & NQL_CHECK) + goto doreply; + if (nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host)) + goto doreply; + i = 0; + if (lp->lc_morehosts) { + lph = lp->lc_morehosts->lpm_hosts; + lphp = &lp->lc_morehosts->lpm_next; + ok = 1; + } else { + lphp = &lp->lc_morehosts; + ok = 0; + } + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(nd->nd_slp, nam, lph)) + goto doreply; + if (++i == LC_MOREHOSTSIZ) { + i = 0; + if (*lphp) { + lph = (*lphp)->lpm_hosts; + lphp = &((*lphp)->lpm_next); + } else + ok = 0; + } else + lph++; + } + nqsrv_locklease(lp); + if (!ok) { + *lphp = (struct nqm *) + malloc(sizeof (struct nqm), + M_NQMHOST, M_WAITOK); + bzero((caddr_t)*lphp, sizeof (struct nqm)); + lph = (*lphp)->lpm_hosts; + } + nqsrv_addhost(lph, nd->nd_slp, nam); + nqsrv_unlocklease(lp); + } else { + lp->lc_flag |= LC_NONCACHABLE; + nqsrv_locklease(lp); + nqsrv_send_eviction(vp, lp, nd->nd_slp, nam, cred); + nqsrv_waitfor_expiry(lp); + nqsrv_unlocklease(lp); + } +doreply: + /* + * Update the lease and return + */ + if ((flags & NQL_CHECK) == 0) + nqsrv_instimeq(lp, *duration); + if (lp->lc_flag & LC_NONCACHABLE) + *cachablep = 0; + else { + *cachablep = 1; + if (flags & NQL_WRITE) + lp->lc_flag |= LC_WRITTEN; + } + splx(s); + return (0); + } + splx(s); + if (flags & NQL_CHECK) + return (0); + + /* + * Allocate new lease + * The value of nqsrv_maxnumlease should be set generously, so that + * the following "printf" happens infrequently. + */ + if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) { + printf("Nqnfs server, too many leases\n"); + do { + (void) tsleep((caddr_t)&lbolt, PSOCK, + "nqsrvnuml", 0); + } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease); + } + MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK); + bzero((caddr_t)lp, sizeof (struct nqlease)); + if (flags & NQL_WRITE) + lp->lc_flag |= (LC_WRITE | LC_WRITTEN); + nqsrv_addhost(&lp->lc_host, nd->nd_slp, nam); + lp->lc_vp = vp; + lp->lc_fsid = fh.fh_fsid; + bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, + fh.fh_fid.fid_len - sizeof (long)); + LIST_INSERT_HEAD(lpp, lp, lc_hash); + vp->v_lease = lp; + s = splsoftclock(); + nqsrv_instimeq(lp, *duration); + splx(s); + *cachablep = 1; + if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases) + nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases; + return (0); +} + +/* + * Local lease check for server syscalls. + * Just set up args and let nqsrv_getlease() do the rest. + */ +lease_check(ap) + struct vop_lease_args /* { + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; + } */ *ap; +{ + int duration = 0, cache; + struct nfsd nfsd; + u_quad_t frev; + + nfsd.nd_slp = NQLOCALSLP; + nfsd.nd_procp = ap->a_p; + (void) nqsrv_getlease(ap->a_vp, &duration, NQL_CHECK | ap->a_flag, + &nfsd, (struct mbuf *)0, &cache, &frev, ap->a_cred); +} + +/* + * Add a host to an nqhost structure for a lease. + */ +void +nqsrv_addhost(lph, slp, nam) + register struct nqhost *lph; + struct nfssvc_sock *slp; + struct mbuf *nam; +{ + register struct sockaddr_in *saddr; + + if (slp == NQLOCALSLP) + lph->lph_flag |= (LC_VALID | LC_LOCAL); + else if (slp == nfs_udpsock) { + saddr = mtod(nam, struct sockaddr_in *); + lph->lph_flag |= (LC_VALID | LC_UDP); + lph->lph_inetaddr = saddr->sin_addr.s_addr; + lph->lph_port = saddr->sin_port; + } else if (slp == nfs_cltpsock) { + lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT); + lph->lph_flag |= (LC_VALID | LC_CLTP); + } else { + lph->lph_flag |= (LC_VALID | LC_SREF); + lph->lph_slp = slp; + slp->ns_sref++; + } +} + +/* + * Update the lease expiry time and position it in the timer queue correctly. + */ +void +nqsrv_instimeq(lp, duration) + register struct nqlease *lp; + u_long duration; +{ + register struct nqlease *tlp; + time_t newexpiry; + + newexpiry = time.tv_sec + duration + nqsrv_clockskew; + if (lp->lc_expiry == newexpiry) + return; + if (lp->lc_timer.cqe_next != 0) + CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); + lp->lc_expiry = newexpiry; + + /* + * Find where in the queue it should be. + */ + tlp = nqtimerhead.cqh_last; + while (tlp != (void *)&nqtimerhead && tlp->lc_expiry > newexpiry) + tlp = tlp->lc_timer.cqe_prev; + if (tlp == nqtimerhead.cqh_last) + NQSTORENOVRAM(newexpiry); + if (tlp == (void *)&nqtimerhead) { + CIRCLEQ_INSERT_HEAD(&nqtimerhead, lp, lc_timer); + } else { + CIRCLEQ_INSERT_AFTER(&nqtimerhead, tlp, lp, lc_timer); + } +} + +/* + * Compare the requesting host address with the lph entry in the lease. + * Return true iff it is the same. + * This is somewhat messy due to the union in the nqhost structure. + * The local host is indicated by the special value of NQLOCALSLP for slp. + */ +nqsrv_cmpnam(slp, nam, lph) + register struct nfssvc_sock *slp; + struct mbuf *nam; + register struct nqhost *lph; +{ + register struct sockaddr_in *saddr; + struct mbuf *addr; + union nethostaddr lhaddr; + int ret; + + if (slp == NQLOCALSLP) { + if (lph->lph_flag & LC_LOCAL) + return (1); + else + return (0); + } + if (slp == nfs_udpsock || slp == nfs_cltpsock) + addr = nam; + else + addr = slp->ns_nam; + if (lph->lph_flag & LC_UDP) + ret = netaddr_match(AF_INET, &lph->lph_haddr, addr); + else if (lph->lph_flag & LC_CLTP) + ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr); + else { + if ((lph->lph_slp->ns_flag & SLP_VALID) == 0) + return (0); + saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *); + if (saddr->sin_family == AF_INET) + lhaddr.had_inetaddr = saddr->sin_addr.s_addr; + else + lhaddr.had_nam = lph->lph_slp->ns_nam; + ret = netaddr_match(saddr->sin_family, &lhaddr, addr); + } + return (ret); +} + +/* + * Send out eviction notice messages to all other hosts for the lease. + */ +void +nqsrv_send_eviction(vp, lp, slp, nam, cred) + struct vnode *vp; + register struct nqlease *lp; + struct nfssvc_sock *slp; + struct mbuf *nam; + struct ucred *cred; +{ + register struct nqhost *lph = &lp->lc_host; + register struct mbuf *m; + register int siz; + struct nqm *lphnext = lp->lc_morehosts; + struct mbuf *mreq, *mb, *mb2, *nam2, *mheadend; + struct socket *so; + struct sockaddr_in *saddr; + fhandle_t *fhp; + caddr_t bpos, cp; + u_long xid; + int len = 1, ok = 1, i = 0; + int sotype, *solockp; + + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(slp, nam, lph)) + lph->lph_flag |= LC_VACATED; + else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { + if (lph->lph_flag & LC_UDP) { + MGET(nam2, M_WAIT, MT_SONAME); + saddr = mtod(nam2, struct sockaddr_in *); + nam2->m_len = saddr->sin_len = + sizeof (struct sockaddr_in); + saddr->sin_family = AF_INET; + saddr->sin_addr.s_addr = lph->lph_inetaddr; + saddr->sin_port = lph->lph_port; + so = nfs_udpsock->ns_so; + } else if (lph->lph_flag & LC_CLTP) { + nam2 = lph->lph_nam; + so = nfs_cltpsock->ns_so; + } else if (lph->lph_slp->ns_flag & SLP_VALID) { + nam2 = (struct mbuf *)0; + so = lph->lph_slp->ns_so; + } else + goto nextone; + sotype = so->so_type; + if (so->so_proto->pr_flags & PR_CONNREQUIRED) + solockp = &lph->lph_slp->ns_solock; + else + solockp = (int *)0; + nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED, + NFSX_FH); + nfsm_build(cp, caddr_t, NFSX_FH); + bzero(cp, NFSX_FH); + fhp = (fhandle_t *)cp; + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + VFS_VPTOFH(vp, &fhp->fh_fid); + m = mreq; + siz = 0; + while (m) { + siz += m->m_len; + m = m->m_next; + } + if (siz <= 0 || siz > NFS_MAXPACKET) { + printf("mbuf siz=%d\n",siz); + panic("Bad nfs svc reply"); + } + m = nfsm_rpchead(cred, TRUE, NQNFSPROC_EVICTED, + RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0, + mreq, siz, &mheadend, &xid); + /* + * For stream protocols, prepend a Sun RPC + * Record Mark. + */ + if (sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_long *) = htonl(0x80000000 | + (m->m_pkthdr.len - NFSX_UNSIGNED)); + } + if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 && + (lph->lph_slp->ns_flag & SLP_VALID) == 0) || + (solockp && (*solockp & NFSMNT_SNDLOCK))) + m_freem(m); + else { + if (solockp) + *solockp |= NFSMNT_SNDLOCK; + (void) nfs_send(so, nam2, m, + (struct nfsreq *)0); + if (solockp) + nfs_sndunlock(solockp); + } + if (lph->lph_flag & LC_UDP) + MFREE(nam2, m); + } +nextone: + if (++i == len) { + if (lphnext) { + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } +} + +/* + * Wait for the lease to expire. + * This will occur when all clients have sent "vacated" messages to + * this server OR when it expires do to timeout. + */ +void +nqsrv_waitfor_expiry(lp) + register struct nqlease *lp; +{ + register struct nqhost *lph; + register int i; + struct nqm *lphnext; + int len, ok; + +tryagain: + if (time.tv_sec > lp->lc_expiry) + return; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + len = 1; + i = 0; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { + lp->lc_flag |= LC_EXPIREDWANTED; + (void) tsleep((caddr_t)&lp->lc_flag, PSOCK, + "nqexp", 0); + goto tryagain; + } + if (++i == len) { + if (lphnext) { + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } +} + +#ifdef NFSSERVER +/* + * Nqnfs server timer that maintains the server lease queue. + * Scan the lease queue for expired entries: + * - when one is found, wakeup anyone waiting for it + * else dequeue and free + */ +void +nqnfs_serverd() +{ + register struct nqlease *lp, *lq; + register struct nqhost *lph; + struct nqlease *nextlp; + struct nqm *lphnext, *olphnext; + struct mbuf *n; + int i, len, ok; + + for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; + lp = nextlp) { + if (lp->lc_expiry >= time.tv_sec) + break; + nextlp = lp->lc_timer.cqe_next; + if (lp->lc_flag & LC_EXPIREDWANTED) { + lp->lc_flag &= ~LC_EXPIREDWANTED; + wakeup((caddr_t)&lp->lc_flag); + } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) { + /* + * Make a best effort at keeping a write caching lease long + * enough by not deleting it until it has been explicitly + * vacated or there have been no writes in the previous + * write_slack seconds since expiry and the nfsds are not + * all busy. The assumption is that if the nfsds are not + * all busy now (no queue of nfs requests), then the client + * would have been able to do at least one write to the + * file during the last write_slack seconds if it was still + * trying to push writes to the server. + */ + if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE && + ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) { + lp->lc_flag &= ~LC_WRITTEN; + nqsrv_instimeq(lp, nqsrv_writeslack); + } else { + CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); + LIST_REMOVE(lp, lc_hash); + /* + * This soft reference may no longer be valid, but + * no harm done. The worst case is if the vnode was + * recycled and has another valid lease reference, + * which is dereferenced prematurely. + */ + lp->lc_vp->v_lease = (struct nqlease *)0; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + olphnext = (struct nqm *)0; + len = 1; + i = 0; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if (lph->lph_flag & LC_CLTP) + MFREE(lph->lph_nam, n); + if (lph->lph_flag & LC_SREF) + nfsrv_slpderef(lph->lph_slp); + if (++i == len) { + if (olphnext) { + free((caddr_t)olphnext, M_NQMHOST); + olphnext = (struct nqm *)0; + } + if (lphnext) { + olphnext = lphnext; + i = 0; + len = LC_MOREHOSTSIZ; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } + FREE((caddr_t)lp, M_NQLEASE); + if (olphnext) + free((caddr_t)olphnext, M_NQMHOST); + nfsstats.srvnqnfs_leases--; + } + } + } +} + +/* + * Called from nfssvc_nfsd() for a getlease rpc request. + * Do the from/to xdr translation and call nqsrv_getlease() to + * do the real work. + */ +nqnfsrv_getlease(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct nfsv2_fattr *fp; + struct vattr va; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + register u_long *tl; + register long t1; + u_quad_t frev; + caddr_t bpos; + int error = 0; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + int flags, rdonly, cache; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); + flags = fxdr_unsigned(int, *tl++); + nfsd->nd_duration = fxdr_unsigned(int, *tl); + if (error = nfsrv_fhtovp(fhp, + TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + if (rdonly && flags == NQL_WRITE) { + error = EROFS; + nfsm_reply(0); + } + (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, nfsd, + nam, &cache, &frev, cred); + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_NQFATTR + 4*NFSX_UNSIGNED); + nfsm_build(tl, u_long *, 4*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(cache); + *tl++ = txdr_unsigned(nfsd->nd_duration); + txdr_hyper(&frev, tl); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_NQFATTR); + nfsm_srvfillattr; + nfsm_srvdone; +} + +/* + * Called from nfssvc_nfsd() when a "vacated" message is received from a + * client. Find the entry and expire it. + */ +nqnfsrv_vacated(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct nqlease *lp; + register struct nqhost *lph; + struct nqlease *tlp = (struct nqlease *)0; + nfsv2fh_t nfh; + fhandle_t *fhp; + register u_long *tl; + register long t1; + struct nqm *lphnext; + int error = 0, i, len, ok, gotit = 0; + char *cp2; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + m_freem(mrep); + /* + * Find the lease by searching the hash list. + */ + for (lp = NQFHHASH(fhp->fh_fid.fid_data)->lh_first; lp != 0; + lp = lp->lc_hash.le_next) + if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] && + fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] && + !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata, + MAXFIDSZ)) { + /* Found it */ + tlp = lp; + break; + } + if (tlp != 0) { + lp = tlp; + len = 1; + i = 0; + lph = &lp->lc_host; + lphnext = lp->lc_morehosts; + ok = 1; + while (ok && (lph->lph_flag & LC_VALID)) { + if (nqsrv_cmpnam(nfsd->nd_slp, nam, lph)) { + lph->lph_flag |= LC_VACATED; + gotit++; + break; + } + if (++i == len) { + if (lphnext) { + len = LC_MOREHOSTSIZ; + i = 0; + lph = lphnext->lpm_hosts; + lphnext = lphnext->lpm_next; + } else + ok = 0; + } else + lph++; + } + if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) { + lp->lc_flag &= ~LC_EXPIREDWANTED; + wakeup((caddr_t)&lp->lc_flag); + } +nfsmout: + return (EPERM); + } + return (EPERM); +} +#endif /* NFSSERVER */ + +#ifdef NFSCLIENT +/* + * Client get lease rpc function. + */ +nqnfs_getlease(vp, rwflag, cred, p) + register struct vnode *vp; + int rwflag; + struct ucred *cred; + struct proc *p; +{ + register u_long *tl; + register caddr_t cp; + register long t1; + register struct nfsnode *np; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + caddr_t bpos, dpos, cp2; + time_t reqtime; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + int cachable; + u_quad_t frev; + + nfsstats.rpccnt[NQNFSPROC_GETLEASE]++; + mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_FH+2*NFSX_UNSIGNED, + &bpos); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(rwflag); + *tl = txdr_unsigned(nmp->nm_leaseterm); + reqtime = time.tv_sec; + nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred); + np = VTONFS(vp); + nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); + cachable = fxdr_unsigned(int, *tl++); + reqtime += fxdr_unsigned(int, *tl++); + if (reqtime > time.tv_sec) { + fxdr_hyper(tl, &frev); + nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev); + nfsm_loadattr(vp, (struct vattr *)0); + } else + error = NQNFS_EXPIRED; + nfsm_reqdone; + return (error); +} + +/* + * Client vacated message function. + */ +nqnfs_vacated(vp, cred) + register struct vnode *vp; + struct ucred *cred; +{ + register caddr_t cp; + register struct mbuf *m; + register int i; + caddr_t bpos; + u_long xid; + int error = 0; + struct mbuf *mreq, *mb, *mb2, *mheadend; + struct nfsmount *nmp; + struct nfsreq myrep; + + nmp = VFSTONFS(vp->v_mount); + nfsstats.rpccnt[NQNFSPROC_VACATED]++; + nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH); + nfsm_fhtom(vp); + m = mreq; + i = 0; + while (m) { + i += m->m_len; + m = m->m_next; + } + m = nfsm_rpchead(cred, TRUE, NQNFSPROC_VACATED, + RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0, + mreq, i, &mheadend, &xid); + if (nmp->nm_sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - + NFSX_UNSIGNED)); + } + myrep.r_flags = 0; + myrep.r_nmp = nmp; + if (nmp->nm_soflags & PR_CONNREQUIRED) + (void) nfs_sndlock(&nmp->nm_flag, (struct nfsreq *)0); + (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep); + if (nmp->nm_soflags & PR_CONNREQUIRED) + nfs_sndunlock(&nmp->nm_flag); + return (error); +} + +/* + * Called for client side callbacks + */ +nqnfs_callback(nmp, mrep, md, dpos) + struct nfsmount *nmp; + struct mbuf *mrep, *md; + caddr_t dpos; +{ + register struct vnode *vp; + register u_long *tl; + register long t1; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct nfsnode *np; + struct nfsd nd; + int error; + char *cp2; + + nd.nd_mrep = mrep; + nd.nd_md = md; + nd.nd_dpos = dpos; + if (error = nfs_getreq(&nd, FALSE)) + return (error); + md = nd.nd_md; + dpos = nd.nd_dpos; + if (nd.nd_procnum != NQNFSPROC_EVICTED) { + m_freem(mrep); + return (EPERM); + } + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + m_freem(mrep); + if (error = nfs_nget(nmp->nm_mountp, fhp, &np)) + return (error); + vp = NFSTOV(np); + if (np->n_timer.cqe_next != 0) { + np->n_expiry = 0; + np->n_flag |= NQNFSEVICTED; + if (nmp->nm_timerhead.cqh_first != np) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); + } + } + vrele(vp); + nfsm_srvdone; +} + +/* + * Nqnfs client helper daemon. Runs once a second to expire leases. + * It also get authorization strings for "kerb" mounts. + * It must start at the beginning of the list again after any potential + * "sleep" since nfs_reclaim() called from vclean() can pull a node off + * the list asynchronously. + */ +nqnfs_clientd(nmp, cred, ncd, flag, argp, p) + register struct nfsmount *nmp; + struct ucred *cred; + struct nfsd_cargs *ncd; + int flag; + caddr_t argp; + struct proc *p; +{ + register struct nfsnode *np; + struct vnode *vp; + struct nfsreq myrep; + int error, vpid; + + /* + * First initialize some variables + */ + nqnfs_prog = txdr_unsigned(NQNFS_PROG); + nqnfs_vers = txdr_unsigned(NQNFS_VER1); + + /* + * If an authorization string is being passed in, get it. + */ + if ((flag & NFSSVC_GOTAUTH) && + (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT)) == 0) { + if (nmp->nm_flag & NFSMNT_HASAUTH) + panic("cld kerb"); + if ((flag & NFSSVC_AUTHINFAIL) == 0) { + if (ncd->ncd_authlen <= RPCAUTH_MAXSIZ && + copyin(ncd->ncd_authstr, nmp->nm_authstr, + ncd->ncd_authlen) == 0) { + nmp->nm_authtype = ncd->ncd_authtype; + nmp->nm_authlen = ncd->ncd_authlen; + } else + nmp->nm_flag |= NFSMNT_AUTHERR; + } else + nmp->nm_flag |= NFSMNT_AUTHERR; + nmp->nm_flag |= NFSMNT_HASAUTH; + wakeup((caddr_t)&nmp->nm_authlen); + } else + nmp->nm_flag |= NFSMNT_WAITAUTH; + + /* + * Loop every second updating queue until there is a termination sig. + */ + while ((nmp->nm_flag & NFSMNT_DISMNT) == 0) { + if (nmp->nm_flag & NFSMNT_NQNFS) { + /* + * If there are no outstanding requests (and therefore no + * processes in nfs_reply) and there is data in the receive + * queue, poke for callbacks. + */ + if (nfs_reqq.tqh_first == 0 && nmp->nm_so && + nmp->nm_so->so_rcv.sb_cc > 0) { + myrep.r_flags = R_GETONEREP; + myrep.r_nmp = nmp; + myrep.r_mrep = (struct mbuf *)0; + myrep.r_procp = (struct proc *)0; + (void) nfs_reply(&myrep); + } + + /* + * Loop through the leases, updating as required. + */ + np = nmp->nm_timerhead.cqh_first; + while (np != (void *)&nmp->nm_timerhead && + (nmp->nm_flag & NFSMNT_DISMINPROG) == 0) { + vp = NFSTOV(np); +if (strncmp(&vp->v_mount->mnt_stat.f_fstypename[0], MOUNT_NFS, MFSNAMELEN)) panic("trash2"); + vpid = vp->v_id; + if (np->n_expiry < time.tv_sec) { + if (vget(vp, 1) == 0) { + nmp->nm_inprog = vp; + if (vpid == vp->v_id) { +if (strncmp(&vp->v_mount->mnt_stat.f_fstypename[0], MOUNT_NFS, MFSNAMELEN)) panic("trash3"); + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + np->n_timer.cqe_next = 0; + if ((np->n_flag & (NMODIFIED | NQNFSEVICTED)) + && vp->v_type == VREG) { + if (np->n_flag & NQNFSEVICTED) { + (void) nfs_vinvalbuf(vp, + V_SAVE, cred, p, 0); + np->n_flag &= ~NQNFSEVICTED; + (void) nqnfs_vacated(vp, cred); + } else { + (void) VOP_FSYNC(vp, cred, + MNT_WAIT, p); + np->n_flag &= ~NMODIFIED; + } + } + } + vrele(vp); + nmp->nm_inprog = NULLVP; + } + } else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) { + if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE)) + == NQNFSWRITE && vp->v_dirtyblkhd.lh_first && + vget(vp, 1) == 0) { + nmp->nm_inprog = vp; +if (strncmp(&vp->v_mount->mnt_stat.f_fstypename[0], MOUNT_NFS, MFSNAMELEN)) panic("trash4"); + if (vpid == vp->v_id && + nqnfs_getlease(vp, NQL_WRITE, cred, p)==0) + np->n_brev = np->n_lrev; + vrele(vp); + nmp->nm_inprog = NULLVP; + } + } else + break; + if (np == nmp->nm_timerhead.cqh_first) + break; + np = nmp->nm_timerhead.cqh_first; + } + } + + /* + * Get an authorization string, if required. + */ + if ((nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT | NFSMNT_HASAUTH)) == 0) { + ncd->ncd_authuid = nmp->nm_authuid; + if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) + nmp->nm_flag |= NFSMNT_WAITAUTH; + else + return (ENEEDAUTH); + } + + /* + * Wait a bit (no pun) and do it again. + */ + if ((nmp->nm_flag & NFSMNT_DISMNT) == 0 && + (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_HASAUTH))) { + error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, + "nqnfstimr", hz / 3); + if (error == EINTR || error == ERESTART) + (void) dounmount(nmp->nm_mountp, MNT_FORCE, p); + } + } + free((caddr_t)nmp, M_NFSMNT); + if (error == EWOULDBLOCK) + error = 0; + return (error); +} + +/* + * Update a client lease. + */ +void +nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev) + register struct nfsmount *nmp; + register struct nfsnode *np; + int rwflag, cachable; + time_t expiry; + u_quad_t frev; +{ + register struct nfsnode *tp; + + if (np->n_timer.cqe_next != 0) { + CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + if (rwflag == NQL_WRITE) + np->n_flag |= NQNFSWRITE; + } else if (rwflag == NQL_READ) + np->n_flag &= ~NQNFSWRITE; + else + np->n_flag |= NQNFSWRITE; + if (cachable) + np->n_flag &= ~NQNFSNONCACHE; + else + np->n_flag |= NQNFSNONCACHE; + np->n_expiry = expiry; + np->n_lrev = frev; + tp = nmp->nm_timerhead.cqh_last; + while (tp != (void *)&nmp->nm_timerhead && tp->n_expiry > np->n_expiry) + tp = tp->n_timer.cqe_prev; + if (tp == (void *)&nmp->nm_timerhead) { + CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); + } else { + CIRCLEQ_INSERT_AFTER(&nmp->nm_timerhead, tp, np, n_timer); + } +} +#endif /* NFSCLIENT */ + +/* + * Adjust all timer queue expiry times when the time of day clock is changed. + * Called from the settimeofday() syscall. + */ +void +lease_updatetime(deltat) + register int deltat; +{ + register struct nqlease *lp; + register struct nfsnode *np; + struct mount *mp; + struct nfsmount *nmp; + int s; + + if (nqnfsstarttime != 0) + nqnfsstarttime += deltat; + s = splsoftclock(); + for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; + lp = lp->lc_timer.cqe_next) + lp->lc_expiry += deltat; + splx(s); + + /* + * Search the mount list for all nqnfs mounts and do their timer + * queues. + */ + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; + mp = mp->mnt_list.cqe_next) { + if (!strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_NFS, + MFSNAMELEN)) { + nmp = VFSTONFS(mp); + if (nmp->nm_flag & NFSMNT_NQNFS) { + for (np = nmp->nm_timerhead.cqh_first; + np != (void *)&nmp->nm_timerhead; + np = np->n_timer.cqe_next) { + np->n_expiry += deltat; + } + } + } + } +} + +/* + * Lock a server lease. + */ +void +nqsrv_locklease(lp) + struct nqlease *lp; +{ + + while (lp->lc_flag & LC_LOCKED) { + lp->lc_flag |= LC_WANTED; + (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0); + } + lp->lc_flag |= LC_LOCKED; + lp->lc_flag &= ~LC_WANTED; +} + +/* + * Unlock a server lease. + */ +void +nqsrv_unlocklease(lp) + struct nqlease *lp; +{ + + lp->lc_flag &= ~LC_LOCKED; + if (lp->lc_flag & LC_WANTED) + wakeup((caddr_t)lp); +} diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c new file mode 100644 index 00000000000..44daa8a30b7 --- /dev/null +++ b/sys/nfs/nfs_serv.c @@ -0,0 +1,1902 @@ +/* $NetBSD: nfs_serv.c,v 1.18 1995/05/23 06:22:47 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_serv.c 8.4 (Berkeley) 6/4/94 + */ + +/* + * nfs version 2 server calls to vnode ops + * - these routines generally have 3 phases + * 1 - break down and validate rpc request in mbuf list + * 2 - do the vnode ops for the request + * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) + * 3 - build the rpc reply in an mbuf list + * nb: + * - do not mix the phases, since the nfsm_?? macros can return failures + * on a bad rpc or similar and do not do any vrele() or vput()'s + * + * - the nfsm_reply() macro generates an nfs rpc reply with the nfs + * error number iff error != 0 whereas + * returning an error from the server function implies a fatal error + * such as a badly constructed rpc request that should be dropped without + * a reply. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/mbuf.h> +#include <sys/dirent.h> +#include <sys/stat.h> + +#include <vm/vm.h> + +#include <nfs/nfsv2.h> +#include <nfs/rpcv2.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nqnfs.h> + +/* Defs */ +#define TRUE 1 +#define FALSE 0 + +/* Global vars */ +extern u_long nfs_procids[NFS_NPROCS]; +extern u_long nfs_xdrneg1; +extern u_long nfs_false, nfs_true; +nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, + NFCHR, NFNON }; + +/* + * nqnfs access service + */ +nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, mode = 0; + char *cp2; + struct mbuf *mb, *mreq; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + if (*tl++ == nfs_true) + mode |= VREAD; + if (*tl++ == nfs_true) + mode |= VWRITE; + if (*tl == nfs_true) + mode |= VEXEC; + error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp); + vput(vp); + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * nfs getattr service + */ +nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct nfsv2_fattr *fp; + struct vattr va; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + nqsrv_getl(vp, NQL_READ); + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + nfsm_srvdone; +} + +/* + * nfs setattr service + */ +nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct vattr va; + register struct nfsv2_sattr *sp; + register struct nfsv2_fattr *fp; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + u_quad_t frev, frev2; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + nqsrv_getl(vp, NQL_WRITE); + VATTR_NULL(&va); + /* + * Nah nah nah nah na nah + * There is a bug in the Sun client that puts 0xffff in the mode + * field of sattr when it should put in 0xffffffff. The u_short + * doesn't sign extend. + * --> check the low order 2 bytes for 0xffff + */ + if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) + va.va_mode = nfstov_mode(sp->sa_mode); + if (sp->sa_uid != nfs_xdrneg1) + va.va_uid = fxdr_unsigned(uid_t, sp->sa_uid); + if (sp->sa_gid != nfs_xdrneg1) + va.va_gid = fxdr_unsigned(gid_t, sp->sa_gid); + if (nfsd->nd_nqlflag == NQL_NOVAL) { + if (sp->sa_nfssize != nfs_xdrneg1) + va.va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize); + if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) { +#ifdef notyet + fxdr_nfstime(&sp->sa_nfsatime, &va.va_atime); +#else + va.va_atime.ts_sec = + fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec); + va.va_atime.ts_nsec = 0; +#endif + } + if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1) + fxdr_nfstime(&sp->sa_nfsmtime, &va.va_mtime); + } else { + fxdr_hyper(&sp->sa_nqsize, &va.va_size); + fxdr_nqtime(&sp->sa_nqatime, &va.va_atime); + fxdr_nqtime(&sp->sa_nqmtime, &va.va_mtime); + va.va_flags = fxdr_unsigned(u_long, sp->sa_nqflags); + } + + /* + * If the size is being changed write acces is required, otherwise + * just check for a read only file system. + */ + if (va.va_size == ((u_quad_t)((quad_t) -1))) { + if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { + error = EROFS; + goto out; + } + } else { + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly, + nfsd->nd_procp)) + goto out; + } + if (error = VOP_SETATTR(vp, &va, cred, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); +out: + vput(vp); + nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + if (nfsd->nd_nqlflag != NQL_NOVAL) { + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + txdr_hyper(&frev2, tl); + } + nfsm_srvdone; +} + +/* + * nfs lookup rpc + */ +nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct nfsv2_fattr *fp; + struct nameidata nd; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + register caddr_t cp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, cache, duration2, cache2, len; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vattr va; + u_quad_t frev, frev2; + + fhp = &nfh.fh_generic; + duration2 = 0; + if (nfsd->nd_nqlflag != NQL_NOVAL) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + duration2 = fxdr_unsigned(int, *tl); + } + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + nfsm_reply(0); + nqsrv_getl(nd.ni_startdir, NQL_READ); + vrele(nd.ni_startdir); + FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); + vp = nd.ni_vp; + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) { + vput(vp); + nfsm_reply(0); + } + if (duration2) + (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd, + nam, &cache2, &frev2, cred); + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED); + if (nfsd->nd_nqlflag != NQL_NOVAL) { + if (duration2) { + nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(NQL_READ); + *tl++ = txdr_unsigned(cache2); + *tl++ = txdr_unsigned(duration2); + txdr_hyper(&frev2, tl); + } else { + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + *tl = 0; + } + } + nfsm_srvfhtom(fhp); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + nfsm_srvdone; +} + +/* + * nfs readlink service + */ +nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; + register struct iovec *ivp = iv; + register struct mbuf *mp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, i, tlen, len; + char *cp2; + struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + len = 0; + i = 0; + while (len < NFS_MAXPATHLEN) { + MGET(mp, M_WAIT, MT_DATA); + MCLGET(mp, M_WAIT); + mp->m_len = NFSMSIZ(mp); + if (len == 0) + mp3 = mp2 = mp; + else { + mp2->m_next = mp; + mp2 = mp; + } + if ((len+mp->m_len) > NFS_MAXPATHLEN) { + mp->m_len = NFS_MAXPATHLEN-len; + len = NFS_MAXPATHLEN; + } else + len += mp->m_len; + ivp->iov_base = mtod(mp, caddr_t); + ivp->iov_len = mp->m_len; + i++; + ivp++; + } + uiop->uio_iov = iv; + uiop->uio_iovcnt = i; + uiop->uio_offset = 0; + uiop->uio_resid = len; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = (struct proc *)0; + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) { + m_freem(mp3); + nfsm_reply(0); + } + if (vp->v_type != VLNK) { + error = EINVAL; + goto out; + } + nqsrv_getl(vp, NQL_READ); + error = VOP_READLINK(vp, uiop, cred); +out: + vput(vp); + if (error) + m_freem(mp3); + nfsm_reply(NFSX_UNSIGNED); + if (uiop->uio_resid > 0) { + len -= uiop->uio_resid; + tlen = nfsm_rndup(len); + nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); + } + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + *tl = txdr_unsigned(len); + mb->m_next = mp3; + nfsm_srvdone; +} + +/* + * nfs read service + */ +nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct iovec *iv; + struct iovec *iv2; + register struct mbuf *m; + register struct nfsv2_fattr *fp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct mbuf *m2; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + struct vattr va; + off_t off; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + if (nfsd->nd_nqlflag == NQL_NOVAL) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + off = (off_t)fxdr_unsigned(u_long, *tl); + } else { + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + fxdr_hyper(tl, &off); + } + nfsm_srvstrsiz(cnt, NFS_MAXDATA); + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + if (vp->v_type != VREG) { + error = (vp->v_type == VDIR) ? EISDIR : EACCES; + vput(vp); + nfsm_reply(0); + } + nqsrv_getl(vp, NQL_READ); + if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) && + (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) { + vput(vp); + nfsm_reply(0); + } + if (error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + if (off >= va.va_size) + cnt = 0; + else if ((off + cnt) > va.va_size) + cnt = nfsm_rndup(va.va_size - off); + nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt)); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + len = left = cnt; + if (cnt > 0) { + /* + * Generate the mbuf list with the uio_iov ref. to it. + */ + i = 0; + m = m2 = mb; + MALLOC(iv, struct iovec *, + ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec), + M_TEMP, M_WAITOK); + iv2 = iv; + while (left > 0) { + siz = min(M_TRAILINGSPACE(m), left); + if (siz > 0) { + m->m_len += siz; + iv->iov_base = bpos; + iv->iov_len = siz; + iv++; + i++; + left -= siz; + } + if (left > 0) { + MGET(m, M_WAIT, MT_DATA); + MCLGET(m, M_WAIT); + m->m_len = 0; + m2->m_next = m; + m2 = m; + bpos = mtod(m, caddr_t); + } + } + uiop->uio_iov = iv2; + uiop->uio_iovcnt = i; + uiop->uio_offset = off; + uiop->uio_resid = cnt; + uiop->uio_rw = UIO_READ; + uiop->uio_segflg = UIO_SYSSPACE; + error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); + off = uiop->uio_offset; + FREE((caddr_t)iv2, M_TEMP); + if (error || (error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp))) { + m_freem(mreq); + vput(vp); + nfsm_reply(0); + } + } else + uiop->uio_resid = 0; + vput(vp); + nfsm_srvfillattr; + len -= uiop->uio_resid; + tlen = nfsm_rndup(len); + if (cnt != tlen || tlen != len) + nfsm_adj(mb, cnt-tlen, tlen-len); + *tl = txdr_unsigned(len); + nfsm_srvdone; +} + +/* + * nfs write service + */ +nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct iovec *ivp; + register struct mbuf *mp; + register struct nfsv2_fattr *fp; + struct iovec iv[NFS_MAXIOVEC]; + struct vattr va; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, siz, len, xfer; + int ioflags = IO_SYNC | IO_NODELOCKED; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct uio io, *uiop = &io; + off_t off; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); + if (nfsd->nd_nqlflag == NQL_NOVAL) { + off = (off_t)fxdr_unsigned(u_long, *++tl); + tl += 2; + } else { + fxdr_hyper(tl, &off); + tl += 2; + if (fxdr_unsigned(u_long, *tl++)) + ioflags |= IO_APPEND; + } + len = fxdr_unsigned(long, *tl); + if (len > NFS_MAXDATA || len <= 0) { + error = EBADRPC; + nfsm_reply(0); + } + if (dpos == (mtod(md, caddr_t)+md->m_len)) { + mp = md->m_next; + if (mp == NULL) { + error = EBADRPC; + nfsm_reply(0); + } + } else { + mp = md; + siz = dpos-mtod(mp, caddr_t); + mp->m_len -= siz; + NFSMADV(mp, siz); + } + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + if (vp->v_type != VREG) { + error = (vp->v_type == VDIR) ? EISDIR : EACCES; + vput(vp); + nfsm_reply(0); + } + nqsrv_getl(vp, NQL_WRITE); + if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + uiop->uio_resid = 0; + uiop->uio_rw = UIO_WRITE; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_procp = (struct proc *)0; + /* + * Do up to NFS_MAXIOVEC mbufs of write each iteration of the + * loop until done. + */ + while (len > 0 && uiop->uio_resid == 0) { + ivp = iv; + siz = 0; + uiop->uio_iov = ivp; + uiop->uio_iovcnt = 0; + uiop->uio_offset = off; + while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) { + ivp->iov_base = mtod(mp, caddr_t); + if (len < mp->m_len) + ivp->iov_len = xfer = len; + else + ivp->iov_len = xfer = mp->m_len; +#ifdef notdef + /* Not Yet .. */ + if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0) + ivp->iov_op = NULL; /* what should it be ?? */ + else + ivp->iov_op = NULL; +#endif + uiop->uio_iovcnt++; + ivp++; + len -= xfer; + siz += xfer; + mp = mp->m_next; + } + if (len > 0 && mp == NULL) { + error = EBADRPC; + vput(vp); + nfsm_reply(0); + } + uiop->uio_resid = siz; + if (error = VOP_WRITE(vp, uiop, ioflags, cred)) { + vput(vp); + nfsm_reply(0); + } + off = uiop->uio_offset; + } + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + if (nfsd->nd_nqlflag != NQL_NOVAL) { + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + txdr_hyper(&va.va_filerev, tl); + } + nfsm_srvdone; +} + +/* + * nfs create service + * now does a truncate to 0 length via. setattr if it already exists + */ +nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct nfsv2_fattr *fp; + struct vattr va; + register struct nfsv2_sattr *sp; + register u_long *tl; + struct nameidata nd; + register caddr_t cp; + register long t1; + caddr_t bpos; + int error = 0, rdev, cache, len, tsize; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + nd.ni_cnd.cn_nameiop = 0; + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + nfsm_reply(0); + VATTR_NULL(&va); + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + /* + * Iff doesn't exist, create it + * otherwise just truncate to 0 length + * should I set the mode too ?? + */ + if (nd.ni_vp == NULL) { + va.va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); + if (va.va_type == VNON) + va.va_type = VREG; + va.va_mode = nfstov_mode(sp->sa_mode); + if (nfsd->nd_nqlflag == NQL_NOVAL) + rdev = fxdr_unsigned(long, sp->sa_nfssize); + else + rdev = fxdr_unsigned(long, sp->sa_nqrdev); + if (va.va_type == VREG || va.va_type == VSOCK) { + vrele(nd.ni_startdir); + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va)) + nfsm_reply(0); + FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); + } else if (va.va_type == VCHR || va.va_type == VBLK || + va.va_type == VFIFO) { + if (va.va_type == VCHR && rdev == 0xffffffff) + va.va_type = VFIFO; + if (va.va_type == VFIFO) { +#ifndef FIFO + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + error = ENXIO; + goto out; +#endif /* FIFO */ + } else if (error = suser(cred, (u_short *)0)) { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + goto out; + } else + va.va_rdev = (dev_t)rdev; + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va)) { + vrele(nd.ni_startdir); + nfsm_reply(0); + } + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); + nd.ni_cnd.cn_proc = nfsd->nd_procp; + nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred; + if (error = lookup(&nd)) { + free(nd.ni_cnd.cn_pnbuf, M_NAMEI); + nfsm_reply(0); + } + FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); + if (nd.ni_cnd.cn_flags & ISSYMLINK) { + vrele(nd.ni_dvp); + vput(nd.ni_vp); + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + error = EINVAL; + nfsm_reply(0); + } + } else { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + error = ENXIO; + goto out; + } + vp = nd.ni_vp; + } else { + vrele(nd.ni_startdir); + free(nd.ni_cnd.cn_pnbuf, M_NAMEI); + vp = nd.ni_vp; + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nfsd->nd_nqlflag == NQL_NOVAL) { + tsize = fxdr_unsigned(long, sp->sa_nfssize); + if (tsize != -1) + va.va_size = (u_quad_t)tsize; + else + va.va_size = -1; + } else + fxdr_hyper(&sp->sa_nqsize, &va.va_size); + if (va.va_size != -1) { + if (error = nfsrv_access(vp, VWRITE, cred, + (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + nqsrv_getl(vp, NQL_WRITE); + if (error = VOP_SETATTR(vp, &va, cred, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + } + } + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) { + vput(vp); + nfsm_reply(0); + } + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfhtom(fhp); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + return (error); +nfsmout: + if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags) + vrele(nd.ni_startdir); + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vput(nd.ni_vp); + return (error); + +out: + vrele(nd.ni_startdir); + free(nd.ni_cnd.cn_pnbuf, M_NAMEI); + nfsm_reply(0); +} + +/* + * nfs remove service + */ +nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct nameidata nd; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, cache, len; + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = DELETE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + nfsm_reply(0); + vp = nd.ni_vp; + if (vp->v_type == VDIR && + (error = suser(cred, (u_short *)0))) + goto out; + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) { + error = EBUSY; + goto out; + } + if (vp->v_flag & VTEXT) + (void) vnode_pager_uncache(vp); +out: + if (!error) { + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + nqsrv_getl(vp, NQL_WRITE); + error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + } else { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + } + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * nfs rename service + */ +nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, cache, len, len2; + char *cp2; + struct mbuf *mb, *mreq; + struct nameidata fromnd, tond; + struct vnode *fvp, *tvp, *tdvp; + nfsv2fh_t fnfh, tnfh; + fhandle_t *ffhp, *tfhp; + u_quad_t frev; + uid_t saved_uid; + + ffhp = &fnfh.fh_generic; + tfhp = &tnfh.fh_generic; + fromnd.ni_cnd.cn_nameiop = 0; + tond.ni_cnd.cn_nameiop = 0; + nfsm_srvmtofh(ffhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + /* + * Remember our original uid so that we can reset cr_uid before + * the second nfs_namei() call, in case it is remapped. + */ + saved_uid = cred->cr_uid; + fromnd.ni_cnd.cn_cred = cred; + fromnd.ni_cnd.cn_nameiop = DELETE; + fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; + if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md, + &dpos, nfsd->nd_procp)) + nfsm_reply(0); + fvp = fromnd.ni_vp; + nfsm_srvmtofh(tfhp); + nfsm_strsiz(len2, NFS_MAXNAMLEN); + cred->cr_uid = saved_uid; + tond.ni_cnd.cn_cred = cred; + tond.ni_cnd.cn_nameiop = RENAME; + tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; + if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md, + &dpos, nfsd->nd_procp)) { + VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + if (tvp != NULL) { + if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + error = EISDIR; + goto out; + } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + error = ENOTDIR; + goto out; + } + if (tvp->v_type == VDIR && tvp->v_mountedhere) { + error = EXDEV; + goto out; + } + } + if (fvp->v_type == VDIR && fvp->v_mountedhere) { + error = EBUSY; + goto out; + } + if (fvp->v_mount != tdvp->v_mount) { + error = EXDEV; + goto out; + } + if (fvp == tdvp) + error = EINVAL; + /* + * If source is the same as the destination (that is the + * same vnode with the same name in the same directory), + * then there is nothing to do. + */ + if (fvp == tvp && fromnd.ni_dvp == tdvp && + fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) + error = -1; +out: + if (!error) { + nqsrv_getl(fromnd.ni_dvp, NQL_WRITE); + nqsrv_getl(tdvp, NQL_WRITE); + if (tvp) + nqsrv_getl(tvp, NQL_WRITE); + error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, + tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + } else { + VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); + vrele(fromnd.ni_dvp); + vrele(fvp); + } + vrele(tond.ni_startdir); + FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); +out1: + vrele(fromnd.ni_startdir); + FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); + nfsm_reply(0); + return (error); + +nfsmout: + if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) { + vrele(tond.ni_startdir); + FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); + } + if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) { + vrele(fromnd.ni_startdir); + FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); + VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); + vrele(fromnd.ni_dvp); + vrele(fvp); + } + return (error); +} + +/* + * nfs link service + */ +nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct nameidata nd; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, len; + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *vp, *xp; + nfsv2fh_t nfh, dnfh; + fhandle_t *fhp, *dfhp; + u_quad_t frev; + + fhp = &nfh.fh_generic; + dfhp = &dnfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvmtofh(dfhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0))) + goto out1; + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT; + if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + goto out1; + xp = nd.ni_vp; + if (xp != NULL) { + error = EEXIST; + goto out; + } + xp = nd.ni_dvp; + if (vp->v_mount != xp->v_mount) + error = EXDEV; +out: + if (!error) { + nqsrv_getl(vp, NQL_WRITE); + nqsrv_getl(xp, NQL_WRITE); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + } else { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + } +out1: + vrele(vp); + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * nfs symbolic link service + */ +nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct vattr va; + struct nameidata nd; + register u_long *tl; + register long t1; + struct nfsv2_sattr *sp; + caddr_t bpos; + struct uio io; + struct iovec iv; + int error = 0, cache, len, len2; + char *pathcp, *cp2; + struct mbuf *mb, *mreq; + nfsv2fh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + pathcp = (char *)0; + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + goto out; + nfsm_strsiz(len2, NFS_MAXPATHLEN); + MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); + iv.iov_base = pathcp; + iv.iov_len = len2; + io.uio_resid = len2; + io.uio_offset = 0; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + nfsm_mtouio(&io, len2); + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + *(pathcp + len2) = '\0'; + if (nd.ni_vp) { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(nd.ni_vp); + error = EEXIST; + goto out; + } + VATTR_NULL(&va); + va.va_mode = fxdr_unsigned(u_short, sp->sa_mode); + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, pathcp); +out: + if (pathcp) + FREE(pathcp, M_TEMP); + nfsm_reply(0); + return (error); +nfsmout: + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + if (pathcp) + FREE(pathcp, M_TEMP); + return (error); +} + +/* + * nfs mkdir service + */ +nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + struct vattr va; + register struct nfsv2_fattr *fp; + struct nameidata nd; + register caddr_t cp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, cache, len; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + nfsm_reply(0); + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + VATTR_NULL(&va); + va.va_type = VDIR; + va.va_mode = nfstov_mode(*tl++); + vp = nd.ni_vp; + if (vp != NULL) { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(vp); + error = EEXIST; + nfsm_reply(0); + } + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va)) + nfsm_reply(0); + vp = nd.ni_vp; + bzero((caddr_t)fhp, sizeof(nfh)); + fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; + if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) { + vput(vp); + nfsm_reply(0); + } + error = VOP_GETATTR(vp, &va, cred, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfhtom(fhp); + nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)); + nfsm_srvfillattr; + return (error); +nfsmout: + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + return (error); +} + +/* + * nfs rmdir service + */ +nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, cache, len; + char *cp2; + struct mbuf *mb, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct nameidata nd; + u_quad_t frev; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_srvstrsiz(len, NFS_MAXNAMLEN); + nd.ni_cnd.cn_cred = cred; + nd.ni_cnd.cn_nameiop = DELETE; + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos, + nfsd->nd_procp)) + nfsm_reply(0); + vp = nd.ni_vp; + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + /* + * No rmdir "." please. + */ + if (nd.ni_dvp == vp) { + error = EINVAL; + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) + error = EBUSY; +out: + if (!error) { + nqsrv_getl(nd.ni_dvp, NQL_WRITE); + nqsrv_getl(vp, NQL_WRITE); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + } else { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + } + nfsm_reply(0); + nfsm_srvdone; +} + +/* + * nfs readdir service + * - mallocs what it thinks is enough to read + * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR + * - calls VOP_READDIR() + * - loops around building the reply + * if the output generated exceeds count break out of loop + * The nfsm_clget macro is used here so that the reply will be packed + * tightly in mbuf clusters. + * - it only knows that it has encountered eof when the VOP_READDIR() + * reads nothing + * - as such one readdir rpc will return eof false although you are there + * and then the next will return eof + * - it trims out records with d_fileno == 0 + * this doesn't matter for Unix clients, but they might confuse clients + * for other os'. + * NB: It is tempting to set eof to true if the VOP_READDIR() reads less + * than requested, but this may not apply to all filesystems. For + * example, client NFS does not { although it is never remote mounted + * anyhow } + * The alternate call nqnfsrv_readdirlook() does lookups as well. + * PS: The NFS protocol spec. does not clarify what the "count" byte + * argument is a count of.. just name strings and file id's or the + * entire reply rpc or ... + * I tried just file name and id sizes and it confused the Sun client, + * so I am using the full rpc size now. The "paranoia.." comment refers + * to including the status longwords that are not a part of the dir. + * "entry" structures, but are in the rpc. + */ +struct flrep { + u_long fl_cachable; + u_long fl_duration; + u_long fl_frev[2]; + nfsv2fh_t fl_nfh; + u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)]; +}; + +nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register char *bp, *be; + register struct mbuf *mp; + register struct dirent *dp; + register caddr_t cp; + register u_long *tl; + register long t1; + caddr_t bpos; + struct mbuf *mb, *mb2, *mreq, *mp2; + char *cpos, *cend, *cp2, *rbuf; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct uio io; + struct iovec iv; + int len, nlen, rem, xfer, tsiz, i, error = 0; + int siz, cnt, fullsiz, eofflag, rdonly, cache; + u_quad_t frev; + u_long off, *cookiebuf, *cookie; + int ncookies; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); + off = fxdr_unsigned(u_long, *tl++); + cnt = fxdr_unsigned(int, *tl); + siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1)); + if (cnt > NFS_MAXREADDIR) + siz = NFS_MAXREADDIR; + fullsiz = siz; + ncookies = siz / 16; /* Guess at the number of cookies needed. */ + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + nqsrv_getl(vp, NQL_READ); + if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + VOP_UNLOCK(vp); + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + MALLOC(cookiebuf, u_long *, ncookies * sizeof(*cookiebuf), M_TEMP, + M_WAITOK); +again: + iv.iov_base = rbuf; + iv.iov_len = fullsiz; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_offset = (off_t)off; + io.uio_resid = fullsiz; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + error = VOP_READDIR(vp, &io, cred, &eofflag, cookiebuf, ncookies); + cookie = cookiebuf; + off = (off_t)io.uio_offset; + if (error) { + vrele(vp); + free((caddr_t)cookiebuf, M_TEMP); + free((caddr_t)rbuf, M_TEMP); + nfsm_reply(0); + } + if (io.uio_resid) { + siz -= io.uio_resid; + + /* + * If nothing read, return eof + * rpc reply + */ + if (siz == 0) { + vrele(vp); + nfsm_reply(2*NFSX_UNSIGNED); + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = nfs_false; + *tl = nfs_true; + FREE((caddr_t)cookiebuf, M_TEMP); + FREE((caddr_t)rbuf, M_TEMP); + return (0); + } + } + + /* + * Check for degenerate cases of nothing useful read. + * If so go try again + */ + cpos = rbuf; + cend = rbuf + siz; + while (cpos < cend) { + dp = (struct dirent *)cpos; + if (dp->d_fileno == 0) { + cpos += dp->d_reclen; + cookie++; + } else + break; + } + if (cpos >= cend) { + siz = fullsiz; + goto again; + } + + len = 3*NFSX_UNSIGNED; /* paranoia, probably can be 0 */ + nfsm_reply(siz); + mp = mp2 = mb; + bp = bpos; + be = bp + M_TRAILINGSPACE(mp); + + /* Loop through the records and build reply */ + while (cpos < cend) { + if (dp->d_fileno != 0) { + nlen = dp->d_namlen; + rem = nfsm_rndup(nlen)-nlen; + len += (4*NFSX_UNSIGNED + nlen + rem); + if (len > cnt) { + eofflag = 0; + break; + } + /* + * Build the directory record xdr from + * the dirent entry. + */ + nfsm_clget; + *tl = nfs_true; + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(dp->d_fileno); + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(nlen); + bp += NFSX_UNSIGNED; + + /* And loop around copying the name */ + xfer = nlen; + cp = dp->d_name; + while (xfer > 0) { + nfsm_clget; + if ((bp+xfer) > be) + tsiz = be-bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + /* And null pad to a long boundary */ + for (i = 0; i < rem; i++) + *bp++ = '\0'; + nfsm_clget; + + /* Finish off the record */ + *tl = txdr_unsigned(*cookie); + bp += NFSX_UNSIGNED; + } + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookie++; + } + vrele(vp); + nfsm_clget; + *tl = nfs_false; + bp += NFSX_UNSIGNED; + nfsm_clget; + if (eofflag) + *tl = nfs_true; + else + *tl = nfs_false; + bp += NFSX_UNSIGNED; + if (mp != mb) { + if (bp < be) + mp->m_len = bp - mtod(mp, caddr_t); + } else + mp->m_len += bp - bpos; + FREE(cookiebuf, M_TEMP); + FREE(rbuf, M_TEMP); + nfsm_srvdone; +} + +nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register char *bp, *be; + register struct mbuf *mp; + register struct dirent *dp; + register caddr_t cp; + register u_long *tl; + register long t1; + caddr_t bpos; + struct mbuf *mb, *mb2, *mreq, *mp2; + char *cpos, *cend, *cp2, *rbuf; + struct vnode *vp, *nvp; + struct flrep fl; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct uio io; + struct iovec iv; + struct vattr va; + struct nfsv2_fattr *fp; + int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2; + int siz, cnt, fullsiz, eofflag, rdonly, cache; + u_quad_t frev, frev2; + u_long off, *cookiebuf, *cookie; + int ncookies; + + fhp = &nfh.fh_generic; + nfsm_srvmtofh(fhp); + nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); + off = fxdr_unsigned(u_long, *tl++); + cnt = fxdr_unsigned(int, *tl++); + duration2 = fxdr_unsigned(int, *tl); + siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1)); + if (cnt > NFS_MAXREADDIR) + siz = NFS_MAXREADDIR; + fullsiz = siz; + ncookies = siz / 16; /* Guess at the number of cookies needed. */ + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + nqsrv_getl(vp, NQL_READ); + if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) { + vput(vp); + nfsm_reply(0); + } + VOP_UNLOCK(vp); + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + MALLOC(cookiebuf, u_long *, ncookies * sizeof(*cookiebuf), M_TEMP, + M_WAITOK); +again: + iv.iov_base = rbuf; + iv.iov_len = fullsiz; + io.uio_iov = &iv; + io.uio_iovcnt = 1; + io.uio_offset = (off_t)off; + io.uio_resid = fullsiz; + io.uio_segflg = UIO_SYSSPACE; + io.uio_rw = UIO_READ; + io.uio_procp = (struct proc *)0; + error = VOP_READDIR(vp, &io, cred, &eofflag, cookiebuf, ncookies); + cookie = cookiebuf; + off = (u_long)io.uio_offset; + if (error) { + vrele(vp); + free((caddr_t)cookiebuf, M_TEMP); + free((caddr_t)rbuf, M_TEMP); + nfsm_reply(0); + } + if (io.uio_resid) { + siz -= io.uio_resid; + + /* + * If nothing read, return eof + * rpc reply + */ + if (siz == 0) { + vrele(vp); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); + *tl++ = nfs_false; + *tl = nfs_true; + FREE((caddr_t)cookiebuf, M_TEMP); + FREE((caddr_t)rbuf, M_TEMP); + return (0); + } + } + + /* + * Check for degenerate cases of nothing useful read. + * If so go try again + */ + cpos = rbuf; + cend = rbuf + siz; + while (cpos < cend) { + dp = (struct dirent *)cpos; + if (dp->d_fileno == 0) { + cpos += dp->d_reclen; + cookie++; + } else + break; + } + if (cpos >= cend) { + siz = fullsiz; + goto again; + } + + len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ + nfsm_reply(siz); + mp = mp2 = mb; + bp = bpos; + be = bp + M_TRAILINGSPACE(mp); + + /* Loop through the records and build reply */ + while (cpos < cend) { + if (dp->d_fileno != 0) { + nlen = dp->d_namlen; + rem = nfsm_rndup(nlen)-nlen; + + /* + * For readdir_and_lookup get the vnode using + * the file number. + */ + if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) + goto invalid; + bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t)); + fl.fl_nfh.fh_generic.fh_fsid = + nvp->v_mount->mnt_stat.f_fsid; + if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) { + vput(nvp); + goto invalid; + } + if (duration2) { + (void) nqsrv_getlease(nvp, &duration2, NQL_READ, + nfsd, nam, &cache2, &frev2, cred); + fl.fl_duration = txdr_unsigned(duration2); + fl.fl_cachable = txdr_unsigned(cache2); + txdr_hyper(&frev2, fl.fl_frev); + } else + fl.fl_duration = 0; + if (VOP_GETATTR(nvp, &va, cred, nfsd->nd_procp)) { + vput(nvp); + goto invalid; + } + vput(nvp); + fp = (struct nfsv2_fattr *)&fl.fl_fattr; + nfsm_srvfillattr; + len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH + + NFSX_NQFATTR); + if (len > cnt) { + eofflag = 0; + break; + } + /* + * Build the directory record xdr from + * the dirent entry. + */ + nfsm_clget; + *tl = nfs_true; + bp += NFSX_UNSIGNED; + + /* + * For readdir_and_lookup copy the stuff out. + */ + xfer = sizeof (struct flrep); + cp = (caddr_t)&fl; + while (xfer > 0) { + nfsm_clget; + if ((bp+xfer) > be) + tsiz = be-bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + nfsm_clget; + *tl = txdr_unsigned(dp->d_fileno); + bp += NFSX_UNSIGNED; + nfsm_clget; + *tl = txdr_unsigned(nlen); + bp += NFSX_UNSIGNED; + + /* And loop around copying the name */ + xfer = nlen; + cp = dp->d_name; + while (xfer > 0) { + nfsm_clget; + if ((bp+xfer) > be) + tsiz = be-bp; + else + tsiz = xfer; + bcopy(cp, bp, tsiz); + bp += tsiz; + xfer -= tsiz; + if (xfer > 0) + cp += tsiz; + } + /* And null pad to a long boundary */ + for (i = 0; i < rem; i++) + *bp++ = '\0'; + nfsm_clget; + + /* Finish off the record */ + *tl = txdr_unsigned(*cookie); + bp += NFSX_UNSIGNED; + } +invalid: + cpos += dp->d_reclen; + dp = (struct dirent *)cpos; + cookie++; + } + vrele(vp); + nfsm_clget; + *tl = nfs_false; + bp += NFSX_UNSIGNED; + nfsm_clget; + if (eofflag) + *tl = nfs_true; + else + *tl = nfs_false; + bp += NFSX_UNSIGNED; + if (mp != mb) { + if (bp < be) + mp->m_len = bp - mtod(mp, caddr_t); + } else + mp->m_len += bp - bpos; + FREE(cookiebuf, M_TEMP); + FREE(rbuf, M_TEMP); + nfsm_srvdone; +} + +/* + * nfs statfs service + */ +nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + register struct statfs *sf; + register struct nfsv2_statfs *sfp; + register u_long *tl; + register long t1; + caddr_t bpos; + int error = 0, rdonly, cache, isnq; + char *cp2; + struct mbuf *mb, *mb2, *mreq; + struct vnode *vp; + nfsv2fh_t nfh; + fhandle_t *fhp; + struct statfs statfs; + u_quad_t frev; + + fhp = &nfh.fh_generic; + isnq = (nfsd->nd_nqlflag != NQL_NOVAL); + nfsm_srvmtofh(fhp); + if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) + nfsm_reply(0); + sf = &statfs; + error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp); + vput(vp); + nfsm_reply(NFSX_STATFS(isnq)); + nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq)); + sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); + sfp->sf_bsize = txdr_unsigned(sf->f_bsize); + sfp->sf_blocks = txdr_unsigned(sf->f_blocks); + sfp->sf_bfree = txdr_unsigned(sf->f_bfree); + sfp->sf_bavail = txdr_unsigned(sf->f_bavail); + if (isnq) { + sfp->sf_files = txdr_unsigned(sf->f_files); + sfp->sf_ffree = txdr_unsigned(sf->f_ffree); + } + nfsm_srvdone; +} + +/* + * Null operation, used by clients to ping server + */ +/* ARGSUSED */ +nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + caddr_t bpos; + int error = VNOVAL, cache; + struct mbuf *mb, *mreq; + u_quad_t frev; + + nfsm_reply(0); + return (error); +} + +/* + * No operation, used for obsolete procedures + */ +/* ARGSUSED */ +nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq) + struct nfsd *nfsd; + struct mbuf *mrep, *md; + caddr_t dpos; + struct ucred *cred; + struct mbuf *nam, **mrq; +{ + caddr_t bpos; + int error, cache; + struct mbuf *mb, *mreq; + u_quad_t frev; + + if (nfsd->nd_repstat) + error = nfsd->nd_repstat; + else + error = EPROCUNAVAIL; + nfsm_reply(0); + return (error); +} + +/* + * Perform access checking for vnodes obtained from file handles that would + * refer to files already opened by a Unix client. You cannot just use + * vn_writechk() and VOP_ACCESS() for two reasons. + * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case + * 2 - The owner is to be given access irrespective of mode bits so that + * processes that chmod after opening a file don't break. I don't like + * this because it opens a security hole, but since the nfs server opens + * a security hole the size of a barn door anyhow, what the heck. + */ +nfsrv_access(vp, flags, cred, rdonly, p) + register struct vnode *vp; + int flags; + register struct ucred *cred; + int rdonly; + struct proc *p; +{ + struct vattr vattr; + int error; + if (flags & VWRITE) { + /* Just vn_writechk() changed to check rdonly */ + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket or a block or character + * device resident on the file system. + */ + if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (vp->v_type) { + case VREG: case VDIR: case VLNK: + return (EROFS); + } + } + /* + * If there's shared text associated with + * the inode, try to free it up once. If + * we fail, we can't allow writing. + */ + if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp)) + return (ETXTBSY); + } + if (error = VOP_GETATTR(vp, &vattr, cred, p)) + return (error); + if ((error = VOP_ACCESS(vp, flags, cred, p)) && + cred->cr_uid != vattr.va_uid) + return (error); + return (0); +} diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c new file mode 100644 index 00000000000..6479d493d44 --- /dev/null +++ b/sys/nfs/nfs_socket.c @@ -0,0 +1,1984 @@ +/* $NetBSD: nfs_socket.c,v 1.21 1995/08/13 00:00:01 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94 + */ + +/* + * Socket operations for use by nfs + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/vnode.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/syslog.h> +#include <sys/tprintf.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsmount.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsrtt.h> +#include <nfs/nqnfs.h> + +#define TRUE 1 +#define FALSE 0 + +/* + * Estimate rto for an nfs rpc sent via. an unreliable datagram. + * Use the mean and mean deviation of rtt for the appropriate type of rpc + * for the frequent rpcs and a default for the others. + * The justification for doing "other" this way is that these rpcs + * happen so infrequently that timer est. would probably be stale. + * Also, since many of these rpcs are + * non-idempotent, a conservative timeout is desired. + * getattr, lookup - A+2D + * read, write - A+4D + * other - nm_timeo + */ +#define NFS_RTO(n, t) \ + ((t) == 0 ? (n)->nm_timeo : \ + ((t) < 3 ? \ + (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ + ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) +#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] +#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] +/* + * External data, mostly RPC constants in XDR form + */ +extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, + rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, + rpc_auth_kerb; +extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; +extern time_t nqnfsstarttime; +extern int nonidempotent[NFS_NPROCS]; + +/* + * Maps errno values to nfs error numbers. + * Use NFSERR_IO as the catch all for ones not specifically defined in + * RFC 1094. + */ +static int nfsrv_errmap[ELAST] = { + NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, + NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, + NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, + NFSERR_IO, +}; + +/* + * Defines which timer to use for the procnum. + * 0 - default + * 1 - getattr + * 2 - lookup + * 3 - read + * 4 - write + */ +static int proct[NFS_NPROCS] = { + 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, +}; + +/* + * There is a congestion window for outstanding rpcs maintained per mount + * point. The cwnd size is adjusted in roughly the way that: + * Van Jacobson, Congestion avoidance and Control, In "Proceedings of + * SIGCOMM '88". ACM, August 1988. + * describes for TCP. The cwnd size is chopped in half on a retransmit timeout + * and incremented by 1/cwnd when each rpc reply is received and a full cwnd + * of rpcs is in progress. + * (The sent count and cwnd are scaled for integer arith.) + * Variants of "slow start" were tried and were found to be too much of a + * performance hit (ave. rtt 3 times larger), + * I suspect due to the large rtt that nfs rpcs have. + */ +#define NFS_CWNDSCALE 256 +#define NFS_MAXCWND (NFS_CWNDSCALE * 32) +static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; +int nfs_sbwait(); +void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); +void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease(); +struct mbuf *nfsm_rpchead(); +int nfsrtton = 0; +struct nfsrtt nfsrtt; + +/* + * Initialize sockets and congestion for a new NFS connection. + * We do not free the sockaddr if error. + */ +nfs_connect(nmp, rep) + register struct nfsmount *nmp; + struct nfsreq *rep; +{ + register struct socket *so; + int s, error, rcvreserve, sndreserve; + struct sockaddr *saddr; + struct sockaddr_in *sin; + struct mbuf *m; + u_short tport; + + nmp->nm_so = (struct socket *)0; + saddr = mtod(nmp->nm_nam, struct sockaddr *); + if (error = socreate(saddr->sa_family, + &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) + goto bad; + so = nmp->nm_so; + nmp->nm_soflags = so->so_proto->pr_flags; + + /* + * Some servers require that the client port be a reserved port number. + */ + if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { + MGET(m, M_WAIT, MT_SONAME); + sin = mtod(m, struct sockaddr_in *); + sin->sin_len = m->m_len = sizeof (struct sockaddr_in); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + tport = IPPORT_RESERVED - 1; + sin->sin_port = htons(tport); + while ((error = sobind(so, m)) == EADDRINUSE && + --tport > IPPORT_RESERVED / 2) + sin->sin_port = htons(tport); + m_freem(m); + if (error) + goto bad; + } + + /* + * Protocols that do not require connections may be optionally left + * unconnected for servers that reply from a port other than NFS_PORT. + */ + if (nmp->nm_flag & NFSMNT_NOCONN) { + if (nmp->nm_soflags & PR_CONNREQUIRED) { + error = ENOTCONN; + goto bad; + } + } else { + if (error = soconnect(so, nmp->nm_nam)) + goto bad; + + /* + * Wait for the connection to complete. Cribbed from the + * connect system call but with the wait timing out so + * that interruptible mounts don't hang here for a long time. + */ + s = splsoftnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + (void) tsleep((caddr_t)&so->so_timeo, PSOCK, + "nfscon", 2 * hz); + if ((so->so_state & SS_ISCONNECTING) && + so->so_error == 0 && rep && + (error = nfs_sigintr(nmp, rep, rep->r_procp))) { + so->so_state &= ~SS_ISCONNECTING; + splx(s); + goto bad; + } + } + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto bad; + } + splx(s); + } + if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { + so->so_rcv.sb_timeo = (5 * hz); + so->so_snd.sb_timeo = (5 * hz); + } else { + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_timeo = 0; + } + if (nmp->nm_sotype == SOCK_DGRAM) { + sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; + rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; + } else if (nmp->nm_sotype == SOCK_SEQPACKET) { + sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; + rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; + } else { + if (nmp->nm_sotype != SOCK_STREAM) + panic("nfscon sotype"); + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + MGET(m, M_WAIT, MT_SOOPTS); + *mtod(m, int *) = 1; + m->m_len = sizeof(int); + sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); + } + if (so->so_proto->pr_protocol == IPPROTO_TCP) { + MGET(m, M_WAIT, MT_SOOPTS); + *mtod(m, int *) = 1; + m->m_len = sizeof(int); + sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); + } + sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) + * 2; + rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) + * 2; + } + if (error = soreserve(so, sndreserve, rcvreserve)) + goto bad; + so->so_rcv.sb_flags |= SB_NOINTR; + so->so_snd.sb_flags |= SB_NOINTR; + + /* Initialize other non-zero congestion variables */ + nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = + nmp->nm_srtt[4] = (NFS_TIMEO << 3); + nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = + nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; + nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ + nmp->nm_sent = 0; + nmp->nm_timeouts = 0; + return (0); + +bad: + nfs_disconnect(nmp); + return (error); +} + +/* + * Reconnect routine: + * Called when a connection is broken on a reliable protocol. + * - clean up the old socket + * - nfs_connect() again + * - set R_MUSTRESEND for all outstanding requests on mount point + * If this fails the mount point is DEAD! + * nb: Must be called with the nfs_sndlock() set on the mount point. + */ +nfs_reconnect(rep) + register struct nfsreq *rep; +{ + register struct nfsreq *rp; + register struct nfsmount *nmp = rep->r_nmp; + int error; + + nfs_disconnect(nmp); + while (error = nfs_connect(nmp, rep)) { + if (error == EINTR || error == ERESTART) + return (EINTR); + (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); + } + + /* + * Loop through outstanding request list and fix up all requests + * on old socket. + */ + for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { + if (rp->r_nmp == nmp) + rp->r_flags |= R_MUSTRESEND; + } + return (0); +} + +/* + * NFS disconnect. Clean up and unlink. + */ +void +nfs_disconnect(nmp) + register struct nfsmount *nmp; +{ + register struct socket *so; + + if (nmp->nm_so) { + so = nmp->nm_so; + nmp->nm_so = (struct socket *)0; + soshutdown(so, 2); + soclose(so); + } +} + +/* + * This is the nfs send routine. For connection based socket types, it + * must be called with an nfs_sndlock() on the socket. + * "rep == NULL" indicates that it has been called from a server. + * For the client side: + * - return EINTR if the RPC is terminated, 0 otherwise + * - set R_MUSTRESEND if the send fails for any reason + * - do any cleanup required by recoverable socket errors (???) + * For the server side: + * - return EINTR or ERESTART if interrupted by a signal + * - return EPIPE if a connection is lost for connection based sockets (TCP...) + * - do any cleanup required by recoverable socket errors (???) + */ +nfs_send(so, nam, top, rep) + register struct socket *so; + struct mbuf *nam; + register struct mbuf *top; + struct nfsreq *rep; +{ + struct mbuf *sendnam; + int error, soflags, flags; + + if (rep) { + if (rep->r_flags & R_SOFTTERM) { + m_freem(top); + return (EINTR); + } + if ((so = rep->r_nmp->nm_so) == NULL) { + rep->r_flags |= R_MUSTRESEND; + m_freem(top); + return (0); + } + rep->r_flags &= ~R_MUSTRESEND; + soflags = rep->r_nmp->nm_soflags; + } else + soflags = so->so_proto->pr_flags; + if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) + sendnam = (struct mbuf *)0; + else + sendnam = nam; + if (so->so_type == SOCK_SEQPACKET) + flags = MSG_EOR; + else + flags = 0; + + error = sosend(so, sendnam, (struct uio *)0, top, + (struct mbuf *)0, flags); + if (error) { + if (rep) { + log(LOG_INFO, "nfs send error %d for server %s\n",error, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + /* + * Deal with errors for the client side. + */ + if (rep->r_flags & R_SOFTTERM) + error = EINTR; + else + rep->r_flags |= R_MUSTRESEND; + } else + log(LOG_INFO, "nfsd send error %d\n", error); + + /* + * Handle any recoverable (soft) socket errors here. (???) + */ + if (error != EINTR && error != ERESTART && + error != EWOULDBLOCK && error != EPIPE) + error = 0; + } + return (error); +} + +#ifdef NFSCLIENT +/* + * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all + * done by soreceive(), but for SOCK_STREAM we must deal with the Record + * Mark and consolidate the data into a new mbuf list. + * nb: Sometimes TCP passes the data up to soreceive() in long lists of + * small mbufs. + * For SOCK_STREAM we must be very careful to read an entire record once + * we have read any of it, even if the system call has been interrupted. + */ +nfs_receive(rep, aname, mp) + register struct nfsreq *rep; + struct mbuf **aname; + struct mbuf **mp; +{ + register struct socket *so; + struct uio auio; + struct iovec aio; + register struct mbuf *m; + struct mbuf *control; + u_long len; + struct mbuf **getnam; + int error, sotype, rcvflg; + struct proc *p = curproc; /* XXX */ + + /* + * Set up arguments for soreceive() + */ + *mp = (struct mbuf *)0; + *aname = (struct mbuf *)0; + sotype = rep->r_nmp->nm_sotype; + + /* + * For reliable protocols, lock against other senders/receivers + * in case a reconnect is necessary. + * For SOCK_STREAM, first get the Record Mark to find out how much + * more there is to get. + * We must lock the socket against other receivers + * until we have an entire rpc request/reply. + */ + if (sotype != SOCK_DGRAM) { + if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) + return (error); +tryagain: + /* + * Check for fatal errors and resending request. + */ + /* + * Ugh: If a reconnect attempt just happened, nm_so + * would have changed. NULL indicates a failed + * attempt that has essentially shut down this + * mount point. + */ + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { + nfs_sndunlock(&rep->r_nmp->nm_flag); + return (EINTR); + } + if ((so = rep->r_nmp->nm_so) == NULL) { + if (error = nfs_reconnect(rep)) { + nfs_sndunlock(&rep->r_nmp->nm_flag); + return (error); + } + goto tryagain; + } + while (rep->r_flags & R_MUSTRESEND) { + m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); + nfsstats.rpcretries++; + if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { + if (error == EINTR || error == ERESTART || + (error = nfs_reconnect(rep))) { + nfs_sndunlock(&rep->r_nmp->nm_flag); + return (error); + } + goto tryagain; + } + } + nfs_sndunlock(&rep->r_nmp->nm_flag); + if (sotype == SOCK_STREAM) { + aio.iov_base = (caddr_t) &len; + aio.iov_len = sizeof(u_long); + auio.uio_iov = &aio; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; + auio.uio_resid = sizeof(u_long); + auio.uio_procp = p; + do { + rcvflg = MSG_WAITALL; + error = soreceive(so, (struct mbuf **)0, &auio, + (struct mbuf **)0, (struct mbuf **)0, &rcvflg); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + return (EINTR); + } + } while (error == EWOULDBLOCK); + if (!error && auio.uio_resid > 0) { + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + sizeof(u_long) - auio.uio_resid, + sizeof(u_long), + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EPIPE; + } + if (error) + goto errout; + len = ntohl(len) & ~0x80000000; + /* + * This is SERIOUS! We are out of sync with the sender + * and forcing a disconnect/reconnect is all I can do. + */ + if (len > NFS_MAXPACKET) { + log(LOG_ERR, "%s (%d) from nfs server %s\n", + "impossible packet length", + len, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EFBIG; + goto errout; + } + auio.uio_resid = len; + do { + rcvflg = MSG_WAITALL; + error = soreceive(so, (struct mbuf **)0, + &auio, mp, (struct mbuf **)0, &rcvflg); + } while (error == EWOULDBLOCK || error == EINTR || + error == ERESTART); + if (!error && auio.uio_resid > 0) { + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + len - auio.uio_resid, len, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = EPIPE; + } + } else { + /* + * NB: Since uio_resid is big, MSG_WAITALL is ignored + * and soreceive() will return when it has either a + * control msg or a data msg. + * We have no use for control msg., but must grab them + * and then throw them away so we know what is going + * on. + */ + auio.uio_resid = len = 100000000; /* Anything Big */ + auio.uio_procp = p; + do { + rcvflg = 0; + error = soreceive(so, (struct mbuf **)0, + &auio, mp, &control, &rcvflg); + if (control) + m_freem(control); + if (error == EWOULDBLOCK && rep) { + if (rep->r_flags & R_SOFTTERM) + return (EINTR); + } + } while (error == EWOULDBLOCK || + (!error && *mp == NULL && control)); + if ((rcvflg & MSG_EOR) == 0) + printf("Egad!!\n"); + if (!error && *mp == NULL) + error = EPIPE; + len -= auio.uio_resid; + } +errout: + if (error && error != EINTR && error != ERESTART) { + m_freem(*mp); + *mp = (struct mbuf *)0; + if (error != EPIPE) + log(LOG_INFO, + "receive error %d from nfs server %s\n", + error, + rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); + if (!error) + error = nfs_reconnect(rep); + if (!error) + goto tryagain; + } + } else { + if ((so = rep->r_nmp->nm_so) == NULL) + return (EACCES); + if (so->so_state & SS_ISCONNECTED) + getnam = (struct mbuf **)0; + else + getnam = aname; + auio.uio_resid = len = 1000000; + auio.uio_procp = p; + do { + rcvflg = 0; + error = soreceive(so, getnam, &auio, mp, + (struct mbuf **)0, &rcvflg); + if (error == EWOULDBLOCK && + (rep->r_flags & R_SOFTTERM)) + return (EINTR); + } while (error == EWOULDBLOCK); + len -= auio.uio_resid; + } + if (error) { + m_freem(*mp); + *mp = (struct mbuf *)0; + } + /* + * Search for any mbufs that are not a multiple of 4 bytes long + * or with m_data not longword aligned. + * These could cause pointer alignment problems, so copy them to + * well aligned mbufs. + */ + nfs_realign(*mp, 5 * NFSX_UNSIGNED); + return (error); +} + +/* + * Implement receipt of reply on a socket. + * We must search through the list of received datagrams matching them + * with outstanding requests using the xid, until ours is found. + */ +/* ARGSUSED */ +nfs_reply(myrep) + struct nfsreq *myrep; +{ + register struct nfsreq *rep; + register struct nfsmount *nmp = myrep->r_nmp; + register long t1; + struct mbuf *mrep, *nam, *md; + u_long rxid, *tl; + caddr_t dpos, cp2; + int error; + + /* + * Loop around until we get our own reply + */ + for (;;) { + /* + * Lock against other receivers so that I don't get stuck in + * sbwait() after someone else has received my reply for me. + * Also necessary for connection based protocols to avoid + * race conditions during a reconnect. + */ + if (error = nfs_rcvlock(myrep)) + return (error); + /* Already received, bye bye */ + if (myrep->r_mrep != NULL) { + nfs_rcvunlock(&nmp->nm_flag); + return (0); + } + /* + * Get the next Rpc reply off the socket + */ + error = nfs_receive(myrep, &nam, &mrep); + nfs_rcvunlock(&nmp->nm_flag); + if (error) { + + /* + * Ignore routing errors on connectionless protocols?? + */ + if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { + nmp->nm_so->so_error = 0; + if (myrep->r_flags & R_GETONEREP) + return (0); + continue; + } + return (error); + } + if (nam) + m_freem(nam); + + /* + * Get the xid and check that it is an rpc reply + */ + md = mrep; + dpos = mtod(md, caddr_t); + nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); + rxid = *tl++; + if (*tl != rpc_reply) { + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (nqnfs_callback(nmp, mrep, md, dpos)) + nfsstats.rpcinvalid++; + } else { + nfsstats.rpcinvalid++; + m_freem(mrep); + } +nfsmout: + if (myrep->r_flags & R_GETONEREP) + return (0); + continue; + } + + /* + * Loop through the request list to match up the reply + * Iff no match, just drop the datagram + */ + for (rep = nfs_reqq.tqh_first; rep != 0; + rep = rep->r_chain.tqe_next) { + if (rep->r_mrep == NULL && rxid == rep->r_xid) { + /* Found it.. */ + rep->r_mrep = mrep; + rep->r_md = md; + rep->r_dpos = dpos; + if (nfsrtton) { + struct rttl *rt; + + rt = &nfsrtt.rttl[nfsrtt.pos]; + rt->proc = rep->r_procnum; + rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); + rt->sent = nmp->nm_sent; + rt->cwnd = nmp->nm_cwnd; + rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; + rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; + rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; + rt->tstamp = time; + if (rep->r_flags & R_TIMING) + rt->rtt = rep->r_rtt; + else + rt->rtt = 1000000; + nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; + } + /* + * Update congestion window. + * Do the additive increase of + * one rpc/rtt. + */ + if (nmp->nm_cwnd <= nmp->nm_sent) { + nmp->nm_cwnd += + (NFS_CWNDSCALE * NFS_CWNDSCALE + + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; + if (nmp->nm_cwnd > NFS_MAXCWND) + nmp->nm_cwnd = NFS_MAXCWND; + } + rep->r_flags &= ~R_SENT; + nmp->nm_sent -= NFS_CWNDSCALE; + /* + * Update rtt using a gain of 0.125 on the mean + * and a gain of 0.25 on the deviation. + */ + if (rep->r_flags & R_TIMING) { + /* + * Since the timer resolution of + * NFS_HZ is so course, it can often + * result in r_rtt == 0. Since + * r_rtt == N means that the actual + * rtt is between N+dt and N+2-dt ticks, + * add 1. + */ + t1 = rep->r_rtt + 1; + t1 -= (NFS_SRTT(rep) >> 3); + NFS_SRTT(rep) += t1; + if (t1 < 0) + t1 = -t1; + t1 -= (NFS_SDRTT(rep) >> 2); + NFS_SDRTT(rep) += t1; + } + nmp->nm_timeouts = 0; + break; + } + } + /* + * If not matched to a request, drop it. + * If it's mine, get out. + */ + if (rep == 0) { + nfsstats.rpcunexpected++; + m_freem(mrep); + } else if (rep == myrep) { + if (rep->r_mrep == NULL) + panic("nfsreply nil"); + return (0); + } + if (myrep->r_flags & R_GETONEREP) + return (0); + } +} + +/* + * nfs_request - goes something like this + * - fill in request struct + * - links it into list + * - calls nfs_send() for first transmit + * - calls nfs_receive() to get reply + * - break down rpc header and return with nfs reply pointed to + * by mrep or error + * nb: always frees up mreq mbuf list + */ +nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) + struct vnode *vp; + struct mbuf *mrest; + int procnum; + struct proc *procp; + struct ucred *cred; + struct mbuf **mrp; + struct mbuf **mdp; + caddr_t *dposp; +{ + register struct mbuf *m, *mrep; + register struct nfsreq *rep; + register u_long *tl; + register int i; + struct nfsmount *nmp; + struct mbuf *md, *mheadend; + struct nfsreq *reph; + struct nfsnode *np; + time_t reqtime, waituntil; + caddr_t dpos, cp2; + int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; + int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; + u_long xid; + u_quad_t frev; + char *auth_str; + + nmp = VFSTONFS(vp->v_mount); + MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); + rep->r_nmp = nmp; + rep->r_vp = vp; + rep->r_procp = procp; + rep->r_procnum = procnum; + i = 0; + m = mrest; + while (m) { + i += m->m_len; + m = m->m_next; + } + mrest_len = i; + + /* + * Get the RPC header with authorization. + */ +kerbauth: + auth_str = (char *)0; + if (nmp->nm_flag & NFSMNT_KERB) { + if (failed_auth) { + error = nfs_getauth(nmp, rep, cred, &auth_type, + &auth_str, &auth_len); + if (error) { + free((caddr_t)rep, M_NFSREQ); + m_freem(mrest); + return (error); + } + } else { + auth_type = RPCAUTH_UNIX; + auth_len = 5 * NFSX_UNSIGNED; + } + } else { + auth_type = RPCAUTH_UNIX; + auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ? + nmp->nm_numgrps : cred->cr_ngroups) << 2) + + 5 * NFSX_UNSIGNED; + } + m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, + auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); + if (auth_str) + free(auth_str, M_TEMP); + + /* + * For stream protocols, insert a Sun RPC Record Mark. + */ + if (nmp->nm_sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_long *) = htonl(0x80000000 | + (m->m_pkthdr.len - NFSX_UNSIGNED)); + } + rep->r_mreq = m; + rep->r_xid = xid; +tryagain: + if (nmp->nm_flag & NFSMNT_SOFT) + rep->r_retry = nmp->nm_retry; + else + rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ + rep->r_rtt = rep->r_rexmit = 0; + if (proct[procnum] > 0) + rep->r_flags = R_TIMING; + else + rep->r_flags = 0; + rep->r_mrep = NULL; + + /* + * Do the client side RPC. + */ + nfsstats.rpcrequests++; + /* + * Chain request into list of outstanding requests. Be sure + * to put it LAST so timer finds oldest requests first. + */ + s = splsoftclock(); + TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); + + /* Get send time for nqnfs */ + reqtime = time.tv_sec; + + /* + * If backing off another request or avoiding congestion, don't + * send this one now but let timer do it. If not timing a request, + * do it now. + */ + if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || + (nmp->nm_flag & NFSMNT_DUMBTIMR) || + nmp->nm_sent < nmp->nm_cwnd)) { + splx(s); + if (nmp->nm_soflags & PR_CONNREQUIRED) + error = nfs_sndlock(&nmp->nm_flag, rep); + if (!error) { + m = m_copym(m, 0, M_COPYALL, M_WAIT); + error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); + if (nmp->nm_soflags & PR_CONNREQUIRED) + nfs_sndunlock(&nmp->nm_flag); + } + if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { + nmp->nm_sent += NFS_CWNDSCALE; + rep->r_flags |= R_SENT; + } + } else { + splx(s); + rep->r_rtt = -1; + } + + /* + * Wait for the reply from our send or the timer's. + */ + if (!error || error == EPIPE) + error = nfs_reply(rep); + + /* + * RPC done, unlink the request. + */ + s = splsoftclock(); + TAILQ_REMOVE(&nfs_reqq, rep, r_chain); + splx(s); + + /* + * Decrement the outstanding request count. + */ + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_SENT; /* paranoia */ + nmp->nm_sent -= NFS_CWNDSCALE; + } + + /* + * If there was a successful reply and a tprintf msg. + * tprintf a response. + */ + if (!error && (rep->r_flags & R_TPRINTFMSG)) + nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, + "is alive again"); + mrep = rep->r_mrep; + md = rep->r_md; + dpos = rep->r_dpos; + if (error) { + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * break down the rpc header and check if ok + */ + nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); + if (*tl++ == rpc_msgdenied) { + if (*tl == rpc_mismatch) + error = EOPNOTSUPP; + else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { + if (*tl == rpc_rejectedcred && failed_auth == 0) { + failed_auth++; + mheadend->m_next = (struct mbuf *)0; + m_freem(mrep); + m_freem(rep->r_mreq); + goto kerbauth; + } else + error = EAUTH; + } else + error = EACCES; + m_freem(mrep); + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * skip over the auth_verf, someday we may want to cache auth_short's + * for nfs_reqhead(), but for now just dump it + */ + if (*++tl != 0) { + i = nfsm_rndup(fxdr_unsigned(long, *tl)); + nfsm_adv(i); + } + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + /* 0 == ok */ + if (*tl == 0) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + if (*tl != 0) { + error = fxdr_unsigned(int, *tl); + m_freem(mrep); + if ((nmp->nm_flag & NFSMNT_NQNFS) && + error == NQNFS_TRYLATER) { + error = 0; + waituntil = time.tv_sec + trylater_delay; + while (time.tv_sec < waituntil) + (void) tsleep((caddr_t)&lbolt, + PSOCK, "nqnfstry", 0); + trylater_delay *= nfs_backoff[trylater_cnt]; + if (trylater_cnt < 7) + trylater_cnt++; + goto tryagain; + } + + /* + * If the File Handle was stale, invalidate the + * lookup cache, just in case. + */ + if (error == ESTALE) + cache_purge(vp); + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + return (error); + } + + /* + * For nqnfs, get any lease in reply + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + if (*tl) { + np = VTONFS(vp); + nqlflag = fxdr_unsigned(int, *tl); + nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); + cachable = fxdr_unsigned(int, *tl++); + reqtime += fxdr_unsigned(int, *tl++); + if (reqtime > time.tv_sec) { + fxdr_hyper(tl, &frev); + nqnfs_clientlease(nmp, np, nqlflag, + cachable, reqtime, frev); + } + } + } + *mrp = mrep; + *mdp = md; + *dposp = dpos; + m_freem(rep->r_mreq); + FREE((caddr_t)rep, M_NFSREQ); + return (0); + } + m_freem(mrep); + m_freem(rep->r_mreq); + free((caddr_t)rep, M_NFSREQ); + error = EPROTONOSUPPORT; +nfsmout: + return (error); +} +#endif /* NFSCLIENT */ + +/* + * Generate the rpc reply header + * siz arg. is used to decide if adding a cluster is worthwhile + */ +nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) + int siz; + struct nfsd *nd; + int err; + int cache; + u_quad_t *frev; + struct mbuf **mrq; + struct mbuf **mbp; + caddr_t *bposp; +{ + register u_long *tl; + register struct mbuf *mreq; + caddr_t bpos; + struct mbuf *mb, *mb2; + + MGETHDR(mreq, M_WAIT, MT_DATA); + mb = mreq; + /* + * If this is a big reply, use a cluster else + * try and leave leading space for the lower level headers. + */ + siz += RPC_REPLYSIZ; + if (siz >= MINCLSIZE) { + MCLGET(mreq, M_WAIT); + } else + mreq->m_data += max_hdr; + tl = mtod(mreq, u_long *); + mreq->m_len = 6*NFSX_UNSIGNED; + bpos = ((caddr_t)tl)+mreq->m_len; + *tl++ = txdr_unsigned(nd->nd_retxid); + *tl++ = rpc_reply; + if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { + *tl++ = rpc_msgdenied; + if (err == NQNFS_AUTHERR) { + *tl++ = rpc_autherr; + *tl = rpc_rejectedcred; + mreq->m_len -= NFSX_UNSIGNED; + bpos -= NFSX_UNSIGNED; + } else { + *tl++ = rpc_mismatch; + *tl++ = txdr_unsigned(2); + *tl = txdr_unsigned(2); + } + } else { + *tl++ = rpc_msgaccepted; + *tl++ = 0; + *tl++ = 0; + switch (err) { + case EPROGUNAVAIL: + *tl = txdr_unsigned(RPC_PROGUNAVAIL); + break; + case EPROGMISMATCH: + *tl = txdr_unsigned(RPC_PROGMISMATCH); + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(2); + *tl = txdr_unsigned(2); /* someday 3 */ + break; + case EPROCUNAVAIL: + *tl = txdr_unsigned(RPC_PROCUNAVAIL); + break; + default: + *tl = 0; + if (err != VNOVAL) { + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + if (err) + *tl = txdr_unsigned(nfsrv_errmap[err - 1]); + else + *tl = 0; + } + break; + }; + } + + /* + * For nqnfs, piggyback lease as requested. + */ + if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { + if (nd->nd_nqlflag) { + nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nd->nd_nqlflag); + *tl++ = txdr_unsigned(cache); + *tl++ = txdr_unsigned(nd->nd_duration); + txdr_hyper(frev, tl); + } else { + if (nd->nd_nqlflag != 0) + panic("nqreph"); + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + *tl = 0; + } + } + *mrq = mreq; + *mbp = mb; + *bposp = bpos; + if (err != 0 && err != VNOVAL) + nfsstats.srvrpc_errs++; + return (0); +} + +/* + * Nfs timer routine + * Scan the nfsreq list and retranmit any requests that have timed out + * To avoid retransmission attempts on STREAM sockets (in the future) make + * sure to set the r_retry field to 0 (implies nm_retry == 0). + */ +void +nfs_timer(arg) + void *arg; +{ + register struct nfsreq *rep; + register struct mbuf *m; + register struct socket *so; + register struct nfsmount *nmp; + register int timeo; + static long lasttime = 0; + int s, error; + + s = splsoftnet(); + for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { + nmp = rep->r_nmp; + if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) + continue; + if (nfs_sigintr(nmp, rep, rep->r_procp)) { + rep->r_flags |= R_SOFTTERM; + continue; + } + if (rep->r_rtt >= 0) { + rep->r_rtt++; + if (nmp->nm_flag & NFSMNT_DUMBTIMR) + timeo = nmp->nm_timeo; + else + timeo = NFS_RTO(nmp, proct[rep->r_procnum]); + if (nmp->nm_timeouts > 0) + timeo *= nfs_backoff[nmp->nm_timeouts - 1]; + if (rep->r_rtt <= timeo) + continue; + if (nmp->nm_timeouts < 8) + nmp->nm_timeouts++; + } + /* + * Check for server not responding + */ + if ((rep->r_flags & R_TPRINTFMSG) == 0 && + rep->r_rexmit > nmp->nm_deadthresh) { + nfs_msg(rep->r_procp, + nmp->nm_mountp->mnt_stat.f_mntfromname, + "not responding"); + rep->r_flags |= R_TPRINTFMSG; + } + if (rep->r_rexmit >= rep->r_retry) { /* too many */ + nfsstats.rpctimeouts++; + rep->r_flags |= R_SOFTTERM; + continue; + } + if (nmp->nm_sotype != SOCK_DGRAM) { + if (++rep->r_rexmit > NFS_MAXREXMIT) + rep->r_rexmit = NFS_MAXREXMIT; + continue; + } + if ((so = nmp->nm_so) == NULL) + continue; + + /* + * If there is enough space and the window allows.. + * Resend it + * Set r_rtt to -1 in case we fail to send it now. + */ + rep->r_rtt = -1; + if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && + ((nmp->nm_flag & NFSMNT_DUMBTIMR) || + (rep->r_flags & R_SENT) || + nmp->nm_sent < nmp->nm_cwnd) && + (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ + if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) + error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, + (struct mbuf *)0, (struct mbuf *)0); + else + error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, + nmp->nm_nam, (struct mbuf *)0); + if (error) { + if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) + so->so_error = 0; + } else { + /* + * Iff first send, start timing + * else turn timing off, backoff timer + * and divide congestion window by 2. + */ + if (rep->r_flags & R_SENT) { + rep->r_flags &= ~R_TIMING; + if (++rep->r_rexmit > NFS_MAXREXMIT) + rep->r_rexmit = NFS_MAXREXMIT; + nmp->nm_cwnd >>= 1; + if (nmp->nm_cwnd < NFS_CWNDSCALE) + nmp->nm_cwnd = NFS_CWNDSCALE; + nfsstats.rpcretries++; + } else { + rep->r_flags |= R_SENT; + nmp->nm_sent += NFS_CWNDSCALE; + } + rep->r_rtt = 0; + } + } + } + +#ifdef NFSSERVER + /* + * Call the nqnfs server timer once a second to handle leases. + */ + if (lasttime != time.tv_sec) { + lasttime = time.tv_sec; + nqnfs_serverd(); + } +#endif /* NFSSERVER */ + splx(s); + timeout(nfs_timer, (void *)0, hz / NFS_HZ); +} + +/* + * Test for a termination condition pending on the process. + * This is used for NFSMNT_INT mounts. + */ +nfs_sigintr(nmp, rep, p) + struct nfsmount *nmp; + struct nfsreq *rep; + register struct proc *p; +{ + + if (rep && (rep->r_flags & R_SOFTTERM)) + return (EINTR); + if (!(nmp->nm_flag & NFSMNT_INT)) + return (0); + if (p && p->p_siglist && + (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & + NFSINT_SIGMASK)) + return (EINTR); + return (0); +} + +/* + * Lock a socket against others. + * Necessary for STREAM sockets to ensure you get an entire rpc request/reply + * and also to avoid race conditions between the processes with nfs requests + * in progress when a reconnect is necessary. + */ +nfs_sndlock(flagp, rep) + register int *flagp; + struct nfsreq *rep; +{ + struct proc *p; + int slpflag = 0, slptimeo = 0; + + if (rep) { + p = rep->r_procp; + if (rep->r_nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + } else + p = (struct proc *)0; + while (*flagp & NFSMNT_SNDLOCK) { + if (nfs_sigintr(rep->r_nmp, rep, p)) + return (EINTR); + *flagp |= NFSMNT_WANTSND; + (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", + slptimeo); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *flagp |= NFSMNT_SNDLOCK; + return (0); +} + +/* + * Unlock the stream socket for others. + */ +void +nfs_sndunlock(flagp) + register int *flagp; +{ + + if ((*flagp & NFSMNT_SNDLOCK) == 0) + panic("nfs sndunlock"); + *flagp &= ~NFSMNT_SNDLOCK; + if (*flagp & NFSMNT_WANTSND) { + *flagp &= ~NFSMNT_WANTSND; + wakeup((caddr_t)flagp); + } +} + +nfs_rcvlock(rep) + register struct nfsreq *rep; +{ + register int *flagp = &rep->r_nmp->nm_flag; + int slpflag, slptimeo = 0; + + if (*flagp & NFSMNT_INT) + slpflag = PCATCH; + else + slpflag = 0; + while (*flagp & NFSMNT_RCVLOCK) { + if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) + return (EINTR); + *flagp |= NFSMNT_WANTRCV; + (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", + slptimeo); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + *flagp |= NFSMNT_RCVLOCK; + return (0); +} + +/* + * Unlock the stream socket for others. + */ +void +nfs_rcvunlock(flagp) + register int *flagp; +{ + + if ((*flagp & NFSMNT_RCVLOCK) == 0) + panic("nfs rcvunlock"); + *flagp &= ~NFSMNT_RCVLOCK; + if (*flagp & NFSMNT_WANTRCV) { + *flagp &= ~NFSMNT_WANTRCV; + wakeup((caddr_t)flagp); + } +} + +/* + * Check for badly aligned mbuf data areas and + * realign data in an mbuf list by copying the data areas up, as required. + */ +void +nfs_realign(m, hsiz) + register struct mbuf *m; + int hsiz; +{ + register struct mbuf *m2; + register int siz, mlen, olen; + register caddr_t tcp, fcp; + struct mbuf *mnew; + + while (m) { + /* + * This never happens for UDP, rarely happens for TCP + * but frequently happens for iso transport. + */ + if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { + olen = m->m_len; + fcp = mtod(m, caddr_t); + if ((int)fcp & 0x3) { + m->m_flags &= ~M_PKTHDR; + if (m->m_flags & M_EXT) + m->m_data = m->m_ext.ext_buf + + ((m->m_ext.ext_size - olen) & ~0x3); + else + m->m_data = m->m_dat; + } + m->m_len = 0; + tcp = mtod(m, caddr_t); + mnew = m; + m2 = m->m_next; + + /* + * If possible, only put the first invariant part + * of the RPC header in the first mbuf. + */ + mlen = M_TRAILINGSPACE(m); + if (olen <= hsiz && mlen > hsiz) + mlen = hsiz; + + /* + * Loop through the mbuf list consolidating data. + */ + while (m) { + while (olen > 0) { + if (mlen == 0) { + m2->m_flags &= ~M_PKTHDR; + if (m2->m_flags & M_EXT) + m2->m_data = m2->m_ext.ext_buf; + else + m2->m_data = m2->m_dat; + m2->m_len = 0; + mlen = M_TRAILINGSPACE(m2); + tcp = mtod(m2, caddr_t); + mnew = m2; + m2 = m2->m_next; + } + siz = min(mlen, olen); + if (tcp != fcp) + bcopy(fcp, tcp, siz); + mnew->m_len += siz; + mlen -= siz; + olen -= siz; + tcp += siz; + fcp += siz; + } + m = m->m_next; + if (m) { + olen = m->m_len; + fcp = mtod(m, caddr_t); + } + } + + /* + * Finally, set m_len == 0 for any trailing mbufs that have + * been copied out of. + */ + while (m2) { + m2->m_len = 0; + m2 = m2->m_next; + } + return; + } + m = m->m_next; + } +} + +/* + * Parse an RPC request + * - verify it + * - fill in the cred struct. + */ +nfs_getreq(nd, has_header) + register struct nfsd *nd; + int has_header; +{ + register int len, i; + register u_long *tl; + register long t1; + struct uio uio; + struct iovec iov; + caddr_t dpos, cp2; + u_long nfsvers, auth_type; + int error = 0, nqnfs = 0; + struct mbuf *mrep, *md; + + mrep = nd->nd_mrep; + md = nd->nd_md; + dpos = nd->nd_dpos; + if (has_header) { + nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); + nd->nd_retxid = fxdr_unsigned(u_long, *tl++); + if (*tl++ != rpc_call) { + m_freem(mrep); + return (EBADRPC); + } + } else { + nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); + } + nd->nd_repstat = 0; + if (*tl++ != rpc_vers) { + nd->nd_repstat = ERPCMISMATCH; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nfsvers = nfs_vers; + if (*tl != nfs_prog) { + if (*tl == nqnfs_prog) { + nqnfs++; + nfsvers = nqnfs_vers; + } else { + nd->nd_repstat = EPROGUNAVAIL; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + } + tl++; + if (*tl++ != nfsvers) { + nd->nd_repstat = EPROGMISMATCH; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + nd->nd_procnum = fxdr_unsigned(u_long, *tl++); + if (nd->nd_procnum == NFSPROC_NULL) + return (0); + if (nd->nd_procnum >= NFS_NPROCS || + (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || + (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { + nd->nd_repstat = EPROCUNAVAIL; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + auth_type = *tl++; + len = fxdr_unsigned(int, *tl++); + if (len < 0 || len > RPCAUTH_MAXSIZ) { + m_freem(mrep); + return (EBADRPC); + } + + /* + * Handle auth_unix or auth_kerb. + */ + if (auth_type == rpc_auth_unix) { + len = fxdr_unsigned(int, *++tl); + if (len < 0 || len > NFS_MAXNAMLEN) { + m_freem(mrep); + return (EBADRPC); + } + nfsm_adv(nfsm_rndup(len)); + nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); + nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); + nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); + len = fxdr_unsigned(int, *tl); + if (len < 0 || len > RPCAUTH_UNIXGIDS) { + m_freem(mrep); + return (EBADRPC); + } + nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); + for (i = 0; i < len; i++) + if (i < NGROUPS) + nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); + else + tl++; + nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len; + } else if (auth_type == rpc_auth_kerb) { + nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); + nd->nd_authlen = fxdr_unsigned(int, *tl); + uio.uio_resid = nfsm_rndup(nd->nd_authlen); + if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { + m_freem(mrep); + return (EBADRPC); + } + uio.uio_offset = 0; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + iov.iov_base = (caddr_t)nd->nd_authstr; + iov.iov_len = RPCAUTH_MAXSIZ; + nfsm_mtouio(&uio, uio.uio_resid); + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + nd->nd_flag |= NFSD_NEEDAUTH; + } + + /* + * Do we have any use for the verifier. + * According to the "Remote Procedure Call Protocol Spec." it + * should be AUTH_NULL, but some clients make it AUTH_UNIX? + * For now, just skip over it + */ + len = fxdr_unsigned(int, *++tl); + if (len < 0 || len > RPCAUTH_MAXSIZ) { + m_freem(mrep); + return (EBADRPC); + } + if (len > 0) { + nfsm_adv(nfsm_rndup(len)); + } + + /* + * For nqnfs, get piggybacked lease request. + */ + if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + nd->nd_nqlflag = fxdr_unsigned(int, *tl); + if (nd->nd_nqlflag) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + nd->nd_duration = fxdr_unsigned(int, *tl); + } else + nd->nd_duration = NQ_MINLEASE; + } else { + nd->nd_nqlflag = NQL_NOVAL; + nd->nd_duration = NQ_MINLEASE; + } + nd->nd_md = md; + nd->nd_dpos = dpos; + return (0); +nfsmout: + return (error); +} + +nfs_msg(p, server, msg) + struct proc *p; + char *server, *msg; +{ + tpr_t tpr; + + if (p) + tpr = tprintf_open(p); + else + tpr = NULL; + tprintf(tpr, "nfs server %s: %s\n", server, msg); + tprintf_close(tpr); +} + +#ifdef NFSSERVER +int nfsrv_null(), + nfsrv_getattr(), + nfsrv_setattr(), + nfsrv_lookup(), + nfsrv_readlink(), + nfsrv_read(), + nfsrv_write(), + nfsrv_create(), + nfsrv_remove(), + nfsrv_rename(), + nfsrv_link(), + nfsrv_symlink(), + nfsrv_mkdir(), + nfsrv_rmdir(), + nfsrv_readdir(), + nfsrv_statfs(), + nfsrv_noop(), + nqnfsrv_readdirlook(), + nqnfsrv_getlease(), + nqnfsrv_vacated(), + nqnfsrv_access(); + +int (*nfsrv_procs[NFS_NPROCS])() = { + nfsrv_null, + nfsrv_getattr, + nfsrv_setattr, + nfsrv_noop, + nfsrv_lookup, + nfsrv_readlink, + nfsrv_read, + nfsrv_noop, + nfsrv_write, + nfsrv_create, + nfsrv_remove, + nfsrv_rename, + nfsrv_link, + nfsrv_symlink, + nfsrv_mkdir, + nfsrv_rmdir, + nfsrv_readdir, + nfsrv_statfs, + nqnfsrv_readdirlook, + nqnfsrv_getlease, + nqnfsrv_vacated, + nfsrv_noop, + nqnfsrv_access, +}; + +/* + * Socket upcall routine for the nfsd sockets. + * The caddr_t arg is a pointer to the "struct nfssvc_sock". + * Essentially do as much as possible non-blocking, else punt and it will + * be called with M_WAIT from an nfsd. + */ +void +nfsrv_rcv(so, arg, waitflag) + struct socket *so; + caddr_t arg; + int waitflag; +{ + register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; + register struct mbuf *m; + struct mbuf *mp, *nam; + struct uio auio; + int flags, error; + + if ((slp->ns_flag & SLP_VALID) == 0) + return; +#ifdef notdef + /* + * Define this to test for nfsds handling this under heavy load. + */ + if (waitflag == M_DONTWAIT) { + slp->ns_flag |= SLP_NEEDQ; goto dorecs; + } +#endif + auio.uio_procp = NULL; + if (so->so_type == SOCK_STREAM) { + /* + * If there are already records on the queue, defer soreceive() + * to an nfsd so that there is feedback to the TCP layer that + * the nfs servers are heavily loaded. + */ + if (slp->ns_rec && waitflag == M_DONTWAIT) { + slp->ns_flag |= SLP_NEEDQ; + goto dorecs; + } + + /* + * Do soreceive(). + */ + auio.uio_resid = 1000000000; + flags = MSG_DONTWAIT; + error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); + if (error || mp == (struct mbuf *)0) { + if (error == EWOULDBLOCK) + slp->ns_flag |= SLP_NEEDQ; + else + slp->ns_flag |= SLP_DISCONN; + goto dorecs; + } + m = mp; + if (slp->ns_rawend) { + slp->ns_rawend->m_next = m; + slp->ns_cc += 1000000000 - auio.uio_resid; + } else { + slp->ns_raw = m; + slp->ns_cc = 1000000000 - auio.uio_resid; + } + while (m->m_next) + m = m->m_next; + slp->ns_rawend = m; + + /* + * Now try and parse record(s) out of the raw stream data. + */ + if (error = nfsrv_getstream(slp, waitflag)) { + if (error == EPERM) + slp->ns_flag |= SLP_DISCONN; + else + slp->ns_flag |= SLP_NEEDQ; + } + } else { + do { + auio.uio_resid = 1000000000; + flags = MSG_DONTWAIT; + error = soreceive(so, &nam, &auio, &mp, + (struct mbuf **)0, &flags); + if (mp) { + nfs_realign(mp, 10 * NFSX_UNSIGNED); + if (nam) { + m = nam; + m->m_next = mp; + } else + m = mp; + if (slp->ns_recend) + slp->ns_recend->m_nextpkt = m; + else + slp->ns_rec = m; + slp->ns_recend = m; + m->m_nextpkt = (struct mbuf *)0; + } + if (error) { + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) + && error != EWOULDBLOCK) { + slp->ns_flag |= SLP_DISCONN; + goto dorecs; + } + } + } while (mp); + } + + /* + * Now try and process the request records, non-blocking. + */ +dorecs: + if (waitflag == M_DONTWAIT && + (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) + nfsrv_wakenfsd(slp); +} + +/* + * Try and extract an RPC request from the mbuf data list received on a + * stream socket. The "waitflag" argument indicates whether or not it + * can sleep. + */ +nfsrv_getstream(slp, waitflag) + register struct nfssvc_sock *slp; + int waitflag; +{ + register struct mbuf *m; + register char *cp1, *cp2; + register int len; + struct mbuf *om, *m2, *recm; + u_long recmark; + + if (slp->ns_flag & SLP_GETSTREAM) + panic("nfs getstream"); + slp->ns_flag |= SLP_GETSTREAM; + for (;;) { + if (slp->ns_reclen == 0) { + if (slp->ns_cc < NFSX_UNSIGNED) { + slp->ns_flag &= ~SLP_GETSTREAM; + return (0); + } + m = slp->ns_raw; + if (m->m_len >= NFSX_UNSIGNED) { + bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); + m->m_data += NFSX_UNSIGNED; + m->m_len -= NFSX_UNSIGNED; + } else { + cp1 = (caddr_t)&recmark; + cp2 = mtod(m, caddr_t); + while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { + while (m->m_len == 0) { + m = m->m_next; + cp2 = mtod(m, caddr_t); + } + *cp1++ = *cp2++; + m->m_data++; + m->m_len--; + } + } + slp->ns_cc -= NFSX_UNSIGNED; + slp->ns_reclen = ntohl(recmark) & ~0x80000000; + if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { + slp->ns_flag &= ~SLP_GETSTREAM; + return (EPERM); + } + } + + /* + * Now get the record part. + */ + if (slp->ns_cc == slp->ns_reclen) { + recm = slp->ns_raw; + slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; + slp->ns_cc = slp->ns_reclen = 0; + } else if (slp->ns_cc > slp->ns_reclen) { + len = 0; + m = slp->ns_raw; + om = (struct mbuf *)0; + while (len < slp->ns_reclen) { + if ((len + m->m_len) > slp->ns_reclen) { + m2 = m_copym(m, 0, slp->ns_reclen - len, + waitflag); + if (m2) { + if (om) { + om->m_next = m2; + recm = slp->ns_raw; + } else + recm = m2; + m->m_data += slp->ns_reclen - len; + m->m_len -= slp->ns_reclen - len; + len = slp->ns_reclen; + } else { + slp->ns_flag &= ~SLP_GETSTREAM; + return (EWOULDBLOCK); + } + } else if ((len + m->m_len) == slp->ns_reclen) { + om = m; + len += m->m_len; + m = m->m_next; + recm = slp->ns_raw; + om->m_next = (struct mbuf *)0; + } else { + om = m; + len += m->m_len; + m = m->m_next; + } + } + slp->ns_raw = m; + slp->ns_cc -= len; + slp->ns_reclen = 0; + } else { + slp->ns_flag &= ~SLP_GETSTREAM; + return (0); + } + nfs_realign(recm, 10 * NFSX_UNSIGNED); + if (slp->ns_recend) + slp->ns_recend->m_nextpkt = recm; + else + slp->ns_rec = recm; + slp->ns_recend = recm; + } +} + +/* + * Parse an RPC header. + */ +nfsrv_dorec(slp, nd) + register struct nfssvc_sock *slp; + register struct nfsd *nd; +{ + register struct mbuf *m; + int error; + + if ((slp->ns_flag & SLP_VALID) == 0 || + (m = slp->ns_rec) == (struct mbuf *)0) + return (ENOBUFS); + if (slp->ns_rec = m->m_nextpkt) + m->m_nextpkt = (struct mbuf *)0; + else + slp->ns_recend = (struct mbuf *)0; + if (m->m_type == MT_SONAME) { + nd->nd_nam = m; + nd->nd_md = nd->nd_mrep = m->m_next; + m->m_next = (struct mbuf *)0; + } else { + nd->nd_nam = (struct mbuf *)0; + nd->nd_md = nd->nd_mrep = m; + } + nd->nd_dpos = mtod(nd->nd_md, caddr_t); + if (error = nfs_getreq(nd, TRUE)) { + m_freem(nd->nd_nam); + return (error); + } + return (0); +} + +/* + * Search for a sleeping nfsd and wake it up. + * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the + * running nfsds will go look for the work in the nfssvc_sock list. + */ +void +nfsrv_wakenfsd(slp) + struct nfssvc_sock *slp; +{ + register struct nfsd *nd; + + if ((slp->ns_flag & SLP_VALID) == 0) + return; + for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nd_chain.tqe_next) { + if (nd->nd_flag & NFSD_WAITING) { + nd->nd_flag &= ~NFSD_WAITING; + if (nd->nd_slp) + panic("nfsd wakeup"); + slp->ns_sref++; + nd->nd_slp = slp; + wakeup((caddr_t)nd); + return; + } + } + slp->ns_flag |= SLP_DOREC; + nfsd_head_flag |= NFSD_CHECKSLP; +} +#endif /* NFSSERVER */ diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c new file mode 100644 index 00000000000..9e2d9ea0a35 --- /dev/null +++ b/sys/nfs/nfs_srvcache.c @@ -0,0 +1,332 @@ +/* $NetBSD: nfs_srvcache.c,v 1.10 1994/12/13 17:17:03 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_srvcache.c 8.2 (Berkeley) 8/18/94 + */ + +/* + * Reference: Chet Juszczak, "Improving the Performance and Correctness + * of an NFS Server", in Proc. Winter 1989 USENIX Conference, + * pages 53-63. San Diego, February 1989. + */ +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/mbuf.h> +#include <sys/malloc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> + +#include <netinet/in.h> +#ifdef ISO +#include <netiso/iso.h> +#endif +#include <nfs/nfsm_subs.h> +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsrvcache.h> +#include <nfs/nqnfs.h> + +long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ; + +#define NFSRCHASH(xid) \ + (&nfsrvhashtbl[((xid) + ((xid) >> 24)) & nfsrvhash]) +LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl; +TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead; +u_long nfsrvhash; + +#define TRUE 1 +#define FALSE 0 + +#define NETFAMILY(rp) \ + (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO) + +/* + * Static array that defines which nfs rpc's are nonidempotent + */ +int nonidempotent[NFS_NPROCS] = { + FALSE, + FALSE, + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, +}; + +/* True iff the rpc reply is an nfs status ONLY! */ +static int repliesstatus[NFS_NPROCS] = { + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE, + TRUE, + TRUE, + TRUE, + FALSE, + TRUE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + FALSE, + TRUE, +}; + +/* + * Initialize the server request cache list + */ +nfsrv_initcache() +{ + + nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash); + TAILQ_INIT(&nfsrvlruhead); +} + +/* + * Look for the request in the cache + * If found then + * return action and optionally reply + * else + * insert it in the cache + * + * The rules are as follows: + * - if in progress, return DROP request + * - if completed within DELAY of the current time, return DROP it + * - if completed a longer time ago return REPLY if the reply was cached or + * return DOIT + * Update/add new request at end of lru list + */ +nfsrv_getcache(nam, nd, repp) + struct mbuf *nam; + register struct nfsd *nd; + struct mbuf **repp; +{ + register struct nfsrvcache *rp; + struct mbuf *mb; + struct sockaddr_in *saddr; + caddr_t bpos; + int ret; + + if (nd->nd_nqlflag != NQL_NOVAL) + return (RC_DOIT); +loop: + for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; + rp = rp->rc_hash.le_next) { + if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && + netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) { + if ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + goto loop; + } + rp->rc_flag |= RC_LOCKED; + /* If not at end of LRU chain, move it there */ + if (rp->rc_lru.tqe_next) { + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); + } + if (rp->rc_state == RC_UNUSED) + panic("nfsrv cache"); + if (rp->rc_state == RC_INPROG) { + nfsstats.srvcache_inproghits++; + ret = RC_DROPIT; + } else if (rp->rc_flag & RC_REPSTATUS) { + nfsstats.srvcache_nonidemdonehits++; + nfs_rephead(0, nd, rp->rc_status, + 0, (u_quad_t *)0, repp, &mb, &bpos); + ret = RC_REPLY; + } else if (rp->rc_flag & RC_REPMBUF) { + nfsstats.srvcache_nonidemdonehits++; + *repp = m_copym(rp->rc_reply, 0, M_COPYALL, + M_WAIT); + ret = RC_REPLY; + } else { + nfsstats.srvcache_idemdonehits++; + rp->rc_state = RC_INPROG; + ret = RC_DOIT; + } + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return (ret); + } + } + nfsstats.srvcache_misses++; + if (numnfsrvcache < desirednfsrvcache) { + rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp, + M_NFSD, M_WAITOK); + bzero((char *)rp, sizeof *rp); + numnfsrvcache++; + rp->rc_flag = RC_LOCKED; + } else { + rp = nfsrvlruhead.tqh_first; + while ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + rp = nfsrvlruhead.tqh_first; + } + rp->rc_flag |= RC_LOCKED; + LIST_REMOVE(rp, rc_hash); + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + if (rp->rc_flag & RC_REPMBUF) + m_freem(rp->rc_reply); + if (rp->rc_flag & RC_NAM) + MFREE(rp->rc_nam, mb); + rp->rc_flag &= (RC_LOCKED | RC_WANTED); + } + TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); + rp->rc_state = RC_INPROG; + rp->rc_xid = nd->nd_retxid; + saddr = mtod(nam, struct sockaddr_in *); + switch (saddr->sin_family) { + case AF_INET: + rp->rc_flag |= RC_INETADDR; + rp->rc_inetaddr = saddr->sin_addr.s_addr; + break; + case AF_ISO: + default: + rp->rc_flag |= RC_NAM; + rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT); + break; + }; + rp->rc_proc = nd->nd_procnum; + LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash); + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return (RC_DOIT); +} + +/* + * Update a request cache entry after the rpc has been done + */ +void +nfsrv_updatecache(nam, nd, repvalid, repmbuf) + struct mbuf *nam; + register struct nfsd *nd; + int repvalid; + struct mbuf *repmbuf; +{ + register struct nfsrvcache *rp; + + if (nd->nd_nqlflag != NQL_NOVAL) + return; +loop: + for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; + rp = rp->rc_hash.le_next) { + if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && + netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) { + if ((rp->rc_flag & RC_LOCKED) != 0) { + rp->rc_flag |= RC_WANTED; + (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); + goto loop; + } + rp->rc_flag |= RC_LOCKED; + rp->rc_state = RC_DONE; + /* + * If we have a valid reply update status and save + * the reply for non-idempotent rpc's. + */ + if (repvalid && nonidempotent[nd->nd_procnum]) { + if (repliesstatus[nd->nd_procnum]) { + rp->rc_status = nd->nd_repstat; + rp->rc_flag |= RC_REPSTATUS; + } else { + rp->rc_reply = m_copym(repmbuf, + 0, M_COPYALL, M_WAIT); + rp->rc_flag |= RC_REPMBUF; + } + } + rp->rc_flag &= ~RC_LOCKED; + if (rp->rc_flag & RC_WANTED) { + rp->rc_flag &= ~RC_WANTED; + wakeup((caddr_t)rp); + } + return; + } + } +} + +/* + * Clean out the cache. Called when the last nfsd terminates. + */ +void +nfsrv_cleancache() +{ + register struct nfsrvcache *rp, *nextrp; + + for (rp = nfsrvlruhead.tqh_first; rp != 0; rp = nextrp) { + nextrp = rp->rc_lru.tqe_next; + LIST_REMOVE(rp, rc_hash); + TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); + free(rp, M_NFSD); + } + numnfsrvcache = 0; +} diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c new file mode 100644 index 00000000000..fcb99792838 --- /dev/null +++ b/sys/nfs/nfs_subs.c @@ -0,0 +1,1133 @@ +/* $NetBSD: nfs_subs.c,v 1.21 1995/09/08 13:52:23 ws Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 + */ + +/* + * These functions support the macros and help fiddle mbuf chains for + * the nfs op functions. They do things like create the rpc header and + * copy data between mbuf chains and uio lists. + */ +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/stat.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfsnode.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsmount.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsrtt.h> + +#include <miscfs/specfs/specdev.h> + +#include <netinet/in.h> +#ifdef ISO +#include <netiso/iso.h> +#endif + +#define TRUE 1 +#define FALSE 0 + +/* + * Data items converted to xdr at startup, since they are constant + * This is kinda hokey, but may save a little time doing byte swaps + */ +u_long nfs_procids[NFS_NPROCS]; +u_long nfs_xdrneg1; +u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, + rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred, + rpc_auth_kerb; +u_long nfs_vers, nfs_prog, nfs_true, nfs_false; + +/* And other global data */ +static u_long nfs_xid = 0; +enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON }; +extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +extern int nqnfs_piggy[NFS_NPROCS]; +extern struct nfsrtt nfsrtt; +extern time_t nqnfsstarttime; +extern u_long nqnfs_prog, nqnfs_vers; +extern int nqsrv_clockskew; +extern int nqsrv_writeslack; +extern int nqsrv_maxlease; + +LIST_HEAD(nfsnodehashhead, nfsnode); +extern struct nfsnodehashhead *nfs_hash __P((nfsv2fh_t *)); + +/* + * Create the header for an rpc request packet + * The hsiz is the size of the rest of the nfs request header. + * (just used to decide if a cluster is a good idea) + */ +struct mbuf * +nfsm_reqh(vp, procid, hsiz, bposp) + struct vnode *vp; + u_long procid; + int hsiz; + caddr_t *bposp; +{ + register struct mbuf *mb; + register u_long *tl; + register caddr_t bpos; + struct mbuf *mb2; + struct nfsmount *nmp; + int nqflag; + + MGET(mb, M_WAIT, MT_DATA); + if (hsiz >= MINCLSIZE) + MCLGET(mb, M_WAIT); + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + + /* + * For NQNFS, add lease request. + */ + if (vp) { + nmp = VFSTONFS(vp->v_mount); + if (nmp->nm_flag & NFSMNT_NQNFS) { + nqflag = NQNFS_NEEDLEASE(vp, procid); + if (nqflag) { + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nqflag); + *tl = txdr_unsigned(nmp->nm_leaseterm); + } else { + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + *tl = 0; + } + } + } + /* Finally, return values */ + *bposp = bpos; + return (mb); +} + +/* + * Build the RPC header and fill in the authorization info. + * The authorization string argument is only used when the credentials + * come from outside of the kernel. + * Returns the head of the mbuf list. + */ +struct mbuf * +nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest, + mrest_len, mbp, xidp) + register struct ucred *cr; + int nqnfs; + int procid; + int auth_type; + int auth_len; + char *auth_str; + struct mbuf *mrest; + int mrest_len; + struct mbuf **mbp; + u_long *xidp; +{ + register struct mbuf *mb; + register u_long *tl; + register caddr_t bpos; + register int i; + struct mbuf *mreq, *mb2; + int siz, grpsiz, authsiz; + + authsiz = nfsm_rndup(auth_len); + if (auth_type == RPCAUTH_NQNFS) + authsiz += 2 * NFSX_UNSIGNED; + MGETHDR(mb, M_WAIT, MT_DATA); + if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) { + MCLGET(mb, M_WAIT); + } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) { + MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED); + } else { + MH_ALIGN(mb, 8*NFSX_UNSIGNED); + } + mb->m_len = 0; + mreq = mb; + bpos = mtod(mb, caddr_t); + + /* + * First the RPC header. + */ + nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED); + if (++nfs_xid == 0) + nfs_xid++; + *tl++ = *xidp = txdr_unsigned(nfs_xid); + *tl++ = rpc_call; + *tl++ = rpc_vers; + if (nqnfs) { + *tl++ = txdr_unsigned(NQNFS_PROG); + *tl++ = txdr_unsigned(NQNFS_VER1); + } else { + *tl++ = txdr_unsigned(NFS_PROG); + *tl++ = txdr_unsigned(NFS_VER2); + } + *tl++ = txdr_unsigned(procid); + + /* + * And then the authorization cred. + */ + *tl++ = txdr_unsigned(auth_type); + *tl = txdr_unsigned(authsiz); + switch (auth_type) { + case RPCAUTH_UNIX: + nfsm_build(tl, u_long *, auth_len); + *tl++ = 0; /* stamp ?? */ + *tl++ = 0; /* NULL hostname */ + *tl++ = txdr_unsigned(cr->cr_uid); + *tl++ = txdr_unsigned(cr->cr_gid); + grpsiz = (auth_len >> 2) - 5; + *tl++ = txdr_unsigned(grpsiz); + for (i = 0; i < grpsiz; i++) + *tl++ = txdr_unsigned(cr->cr_groups[i]); + break; + case RPCAUTH_NQNFS: + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(cr->cr_uid); + *tl = txdr_unsigned(auth_len); + siz = auth_len; + while (siz > 0) { + if (M_TRAILINGSPACE(mb) == 0) { + MGET(mb2, M_WAIT, MT_DATA); + if (siz >= MINCLSIZE) + MCLGET(mb2, M_WAIT); + mb->m_next = mb2; + mb = mb2; + mb->m_len = 0; + bpos = mtod(mb, caddr_t); + } + i = min(siz, M_TRAILINGSPACE(mb)); + bcopy(auth_str, bpos, i); + mb->m_len += i; + auth_str += i; + bpos += i; + siz -= i; + } + if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { + for (i = 0; i < siz; i++) + *bpos++ = '\0'; + mb->m_len += siz; + } + break; + }; + nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); + *tl++ = txdr_unsigned(RPCAUTH_NULL); + *tl = 0; + mb->m_next = mrest; + mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len; + mreq->m_pkthdr.rcvif = (struct ifnet *)0; + *mbp = mb; + return (mreq); +} + +/* + * copies mbuf chain to the uio scatter/gather list + */ +nfsm_mbuftouio(mrep, uiop, siz, dpos) + struct mbuf **mrep; + register struct uio *uiop; + int siz; + caddr_t *dpos; +{ + register char *mbufcp, *uiocp; + register int xfer, left, len; + register struct mbuf *mp; + long uiosiz, rem; + int error = 0; + + mp = *mrep; + mbufcp = *dpos; + len = mtod(mp, caddr_t)+mp->m_len-mbufcp; + rem = nfsm_rndup(siz)-siz; + while (siz > 0) { + if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) + return (EFBIG); + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + while (len == 0) { + mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + mbufcp = mtod(mp, caddr_t); + len = mp->m_len; + } + xfer = (left > len) ? len : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (mbufcp, uiocp, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(mbufcp, uiocp, xfer); + else + copyout(mbufcp, uiocp, xfer); + left -= xfer; + len -= xfer; + mbufcp += xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + if (uiop->uio_iov->iov_len <= siz) { + uiop->uio_iovcnt--; + uiop->uio_iov++; + } else { + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + } + siz -= uiosiz; + } + *dpos = mbufcp; + *mrep = mp; + if (rem > 0) { + if (len < rem) + error = nfs_adv(mrep, dpos, rem, len); + else + *dpos += rem; + } + return (error); +} + +/* + * copies a uio scatter/gather list to an mbuf chain... + */ +nfsm_uiotombuf(uiop, mq, siz, bpos) + register struct uio *uiop; + struct mbuf **mq; + int siz; + caddr_t *bpos; +{ + register char *uiocp; + register struct mbuf *mp, *mp2; + register int xfer, left, mlen; + int uiosiz, clflg, rem; + char *cp; + + if (siz > MLEN) /* or should it >= MCLBYTES ?? */ + clflg = 1; + else + clflg = 0; + rem = nfsm_rndup(siz)-siz; + mp = mp2 = *mq; + while (siz > 0) { + if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) + return (EINVAL); + left = uiop->uio_iov->iov_len; + uiocp = uiop->uio_iov->iov_base; + if (left > siz) + left = siz; + uiosiz = left; + while (left > 0) { + mlen = M_TRAILINGSPACE(mp); + if (mlen == 0) { + MGET(mp, M_WAIT, MT_DATA); + if (clflg) + MCLGET(mp, M_WAIT); + mp->m_len = 0; + mp2->m_next = mp; + mp2 = mp; + mlen = M_TRAILINGSPACE(mp); + } + xfer = (left > mlen) ? mlen : left; +#ifdef notdef + /* Not Yet.. */ + if (uiop->uio_iov->iov_op != NULL) + (*(uiop->uio_iov->iov_op)) + (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else +#endif + if (uiop->uio_segflg == UIO_SYSSPACE) + bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + else + copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + mp->m_len += xfer; + left -= xfer; + uiocp += xfer; + uiop->uio_offset += xfer; + uiop->uio_resid -= xfer; + } + if (uiop->uio_iov->iov_len <= siz) { + uiop->uio_iovcnt--; + uiop->uio_iov++; + } else { + uiop->uio_iov->iov_base += uiosiz; + uiop->uio_iov->iov_len -= uiosiz; + } + siz -= uiosiz; + } + if (rem > 0) { + if (rem > M_TRAILINGSPACE(mp)) { + MGET(mp, M_WAIT, MT_DATA); + mp->m_len = 0; + mp2->m_next = mp; + } + cp = mtod(mp, caddr_t)+mp->m_len; + for (left = 0; left < rem; left++) + *cp++ = '\0'; + mp->m_len += rem; + *bpos = cp; + } else + *bpos = mtod(mp, caddr_t)+mp->m_len; + *mq = mp; + return (0); +} + +/* + * Help break down an mbuf chain by setting the first siz bytes contiguous + * pointed to by returned val. + * This is used by the macros nfsm_dissect and nfsm_dissecton for tough + * cases. (The macros use the vars. dpos and dpos2) + */ +nfsm_disct(mdp, dposp, siz, left, cp2) + struct mbuf **mdp; + caddr_t *dposp; + int siz; + int left; + caddr_t *cp2; +{ + register struct mbuf *mp, *mp2; + register int siz2, xfer; + register caddr_t p; + + mp = *mdp; + while (left == 0) { + *mdp = mp = mp->m_next; + if (mp == NULL) + return (EBADRPC); + left = mp->m_len; + *dposp = mtod(mp, caddr_t); + } + if (left >= siz) { + *cp2 = *dposp; + *dposp += siz; + } else if (mp->m_next == NULL) { + return (EBADRPC); + } else if (siz > MHLEN) { + panic("nfs S too big"); + } else { + MGET(mp2, M_WAIT, MT_DATA); + mp2->m_next = mp->m_next; + mp->m_next = mp2; + mp->m_len -= left; + mp = mp2; + *cp2 = p = mtod(mp, caddr_t); + bcopy(*dposp, p, left); /* Copy what was left */ + siz2 = siz-left; + p += left; + mp2 = mp->m_next; + /* Loop around copying up the siz2 bytes */ + while (siz2 > 0) { + if (mp2 == NULL) + return (EBADRPC); + xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; + if (xfer > 0) { + bcopy(mtod(mp2, caddr_t), p, xfer); + NFSMADV(mp2, xfer); + mp2->m_len -= xfer; + p += xfer; + siz2 -= xfer; + } + if (siz2 > 0) + mp2 = mp2->m_next; + } + mp->m_len = siz; + *mdp = mp2; + *dposp = mtod(mp2, caddr_t); + } + return (0); +} + +/* + * Advance the position in the mbuf chain. + */ +nfs_adv(mdp, dposp, offs, left) + struct mbuf **mdp; + caddr_t *dposp; + int offs; + int left; +{ + register struct mbuf *m; + register int s; + + m = *mdp; + s = left; + while (s < offs) { + offs -= s; + m = m->m_next; + if (m == NULL) + return (EBADRPC); + s = m->m_len; + } + *mdp = m; + *dposp = mtod(m, caddr_t)+offs; + return (0); +} + +/* + * Copy a string into mbufs for the hard cases... + */ +nfsm_strtmbuf(mb, bpos, cp, siz) + struct mbuf **mb; + char **bpos; + char *cp; + long siz; +{ + register struct mbuf *m1, *m2; + long left, xfer, len, tlen; + u_long *tl; + int putsize; + + putsize = 1; + m2 = *mb; + left = M_TRAILINGSPACE(m2); + if (left > 0) { + tl = ((u_long *)(*bpos)); + *tl++ = txdr_unsigned(siz); + putsize = 0; + left -= NFSX_UNSIGNED; + m2->m_len += NFSX_UNSIGNED; + if (left > 0) { + bcopy(cp, (caddr_t) tl, left); + siz -= left; + cp += left; + m2->m_len += left; + left = 0; + } + } + /* Loop around adding mbufs */ + while (siz > 0) { + MGET(m1, M_WAIT, MT_DATA); + if (siz > MLEN) + MCLGET(m1, M_WAIT); + m1->m_len = NFSMSIZ(m1); + m2->m_next = m1; + m2 = m1; + tl = mtod(m1, u_long *); + tlen = 0; + if (putsize) { + *tl++ = txdr_unsigned(siz); + m1->m_len -= NFSX_UNSIGNED; + tlen = NFSX_UNSIGNED; + putsize = 0; + } + if (siz < m1->m_len) { + len = nfsm_rndup(siz); + xfer = siz; + if (xfer < len) + *(tl+(xfer>>2)) = 0; + } else { + xfer = len = m1->m_len; + } + bcopy(cp, (caddr_t) tl, xfer); + m1->m_len = len+tlen; + siz -= xfer; + cp += xfer; + } + *mb = m1; + *bpos = mtod(m1, caddr_t)+m1->m_len; + return (0); +} + +/* + * Called once to initialize data structures... + */ +nfs_init() +{ + register int i; + + nfsrtt.pos = 0; + rpc_vers = txdr_unsigned(RPC_VER2); + rpc_call = txdr_unsigned(RPC_CALL); + rpc_reply = txdr_unsigned(RPC_REPLY); + rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); + rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); + rpc_mismatch = txdr_unsigned(RPC_MISMATCH); + rpc_autherr = txdr_unsigned(RPC_AUTHERR); + rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED); + rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); + rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS); + nfs_vers = txdr_unsigned(NFS_VER2); + nfs_prog = txdr_unsigned(NFS_PROG); + nfs_true = txdr_unsigned(TRUE); + nfs_false = txdr_unsigned(FALSE); + nfs_xdrneg1 = txdr_unsigned(-1); + /* Loop thru nfs procids */ + for (i = 0; i < NFS_NPROCS; i++) + nfs_procids[i] = txdr_unsigned(i); +#ifdef NFSCLIENT + /* Ensure async daemons disabled */ + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + nfs_iodwant[i] = (struct proc *)0; + TAILQ_INIT(&nfs_bufq); + nfs_nhinit(); /* Init the nfsnode table */ +#endif /* NFSCLIENT */ +#ifdef NFSSERVER + nfsrv_init(0); /* Init server data structures */ + nfsrv_initcache(); /* Init the server request cache */ +#endif /* NFSSERVER */ + + /* + * Initialize the nqnfs server stuff. + */ + if (nqnfsstarttime == 0) { + nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + + nqsrv_clockskew + nqsrv_writeslack; + NQLOADNOVRAM(nqnfsstarttime); + nqnfs_prog = txdr_unsigned(NQNFS_PROG); + nqnfs_vers = txdr_unsigned(NQNFS_VER1); + CIRCLEQ_INIT(&nqtimerhead); + nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); + } + + /* + * Initialize reply list and start timer + */ + TAILQ_INIT(&nfs_reqq); + nfs_timer(); +} + +#ifdef NFSCLIENT +/* + * Attribute cache routines. + * nfs_loadattrcache() - loads or updates the cache contents from attributes + * that are on the mbuf list + * nfs_getattrcache() - returns valid attributes if found in cache, returns + * error otherwise + */ + +/* + * Load the attribute cache (that lives in the nfsnode entry) with + * the values on the mbuf list and + * Iff vap not NULL + * copy the attributes to *vaper + */ +nfs_loadattrcache(vpp, mdp, dposp, vaper) + struct vnode **vpp; + struct mbuf **mdp; + caddr_t *dposp; + struct vattr *vaper; +{ + register struct vnode *vp = *vpp; + register struct vattr *vap; + register struct nfsv2_fattr *fp; + extern int (**spec_nfsv2nodeop_p)(); + register struct nfsnode *np; + register struct nfsnodehashhead *nhpp; + register long t1; + caddr_t dpos, cp2; + int error = 0, isnq; + struct mbuf *md; + enum vtype vtyp; + u_short vmode; + long rdev; + struct timespec mtime; + struct vnode *nvp; + + md = *mdp; + dpos = *dposp; + t1 = (mtod(md, caddr_t) + md->m_len) - dpos; + isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS); + if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2)) + return (error); + fp = (struct nfsv2_fattr *)cp2; + vtyp = nfstov_type(fp->fa_type); + vmode = fxdr_unsigned(u_short, fp->fa_mode); + if (vtyp == VNON || vtyp == VREG) + vtyp = IFTOVT(vmode); + if (isnq) { + rdev = fxdr_unsigned(long, fp->fa_nqrdev); + fxdr_nqtime(&fp->fa_nqmtime, &mtime); + } else { + rdev = fxdr_unsigned(long, fp->fa_nfsrdev); + fxdr_nfstime(&fp->fa_nfsmtime, &mtime); + } + /* + * If v_type == VNON it is a new node, so fill in the v_type, + * n_mtime fields. Check to see if it represents a special + * device, and if so, check for a possible alias. Once the + * correct vnode has been obtained, fill in the rest of the + * information. + */ + np = VTONFS(vp); + if (vp->v_type == VNON) { + if (vtyp == VCHR && rdev == 0xffffffff) + vp->v_type = vtyp = VFIFO; + else + vp->v_type = vtyp; + if (vp->v_type == VFIFO) { +#ifdef FIFO + extern int (**fifo_nfsv2nodeop_p)(); + vp->v_op = fifo_nfsv2nodeop_p; +#else + return (EOPNOTSUPP); +#endif /* FIFO */ + } + if (vp->v_type == VCHR || vp->v_type == VBLK) { + vp->v_op = spec_nfsv2nodeop_p; + if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) { + /* + * Discard unneeded vnode, but save its nfsnode. + */ + LIST_REMOVE(np, n_hash); + nvp->v_data = vp->v_data; + vp->v_data = NULL; + vp->v_op = spec_vnodeop_p; + vrele(vp); + vgone(vp); + /* + * Reinitialize aliased node. + */ + np->n_vnode = nvp; + nhpp = nfs_hash(&np->n_fh); + LIST_INSERT_HEAD(nhpp, np, n_hash); + *vpp = vp = nvp; + } + } + np->n_mtime = mtime.ts_sec; + } + vap = &np->n_vattr; + vap->va_type = vtyp; + vap->va_mode = (vmode & 07777); + vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); + vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); + vap->va_rdev = (dev_t)rdev; + vap->va_mtime = mtime; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + if (isnq) { + fxdr_hyper(&fp->fa_nqsize, &vap->va_size); + vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize); + fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes); + vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid); + fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime); + vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags); + fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime); + vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen); + fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev); + } else { + vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize); + vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize); + vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE; + vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid); + fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime); + vap->va_flags = 0; + fxdr_nfstime(&fp->fa_nfsctime, &vap->va_ctime); + vap->va_gen = 0; + vap->va_filerev = 0; + } + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else + np->n_size = vap->va_size; + vnode_pager_setsize(vp, (u_long)np->n_size); + } else + np->n_size = vap->va_size; + } + np->n_attrstamp = time.tv_sec; + *dposp = dpos; + *mdp = md; + if (vaper != NULL) { + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); +#ifdef notdef + if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size) + if (np->n_size > vap->va_size) + vaper->va_size = np->n_size; +#endif + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) { + vaper->va_atime.ts_sec = np->n_atim.tv_sec; + vaper->va_atime.ts_nsec = + np->n_atim.tv_usec * 1000; + } + if (np->n_flag & NUPD) { + vaper->va_mtime.ts_sec = np->n_mtim.tv_sec; + vaper->va_mtime.ts_nsec = + np->n_mtim.tv_usec * 1000; + } + } + } + return (0); +} + +/* + * Check the time stamp + * If the cache is valid, copy contents to *vap and return 0 + * otherwise return an error + */ +nfs_getattrcache(vp, vaper) + register struct vnode *vp; + struct vattr *vaper; +{ + register struct nfsnode *np = VTONFS(vp); + register struct vattr *vap; + + if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) { + if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) { + nfsstats.attrcache_misses++; + return (ENOENT); + } + } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { + nfsstats.attrcache_misses++; + return (ENOENT); + } + nfsstats.attrcache_hits++; + vap = &np->n_vattr; + if (vap->va_size != np->n_size) { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else + np->n_size = vap->va_size; + vnode_pager_setsize(vp, (u_long)np->n_size); + } else + np->n_size = vap->va_size; + } + bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); +#ifdef notdef + if ((np->n_flag & NMODIFIED) == 0) { + np->n_size = vaper->va_size; + vnode_pager_setsize(vp, (u_long)np->n_size); + } else if (np->n_size > vaper->va_size) + if (np->n_size > vaper->va_size) + vaper->va_size = np->n_size; +#endif + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) { + vaper->va_atime.ts_sec = np->n_atim.tv_sec; + vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000; + } + if (np->n_flag & NUPD) { + vaper->va_mtime.ts_sec = np->n_mtim.tv_sec; + vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000; + } + } + return (0); +} +#endif + +/* + * Set up nameidata for a lookup() call and do it + */ +nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p) + register struct nameidata *ndp; + fhandle_t *fhp; + int len; + struct nfssvc_sock *slp; + struct mbuf *nam; + struct mbuf **mdp; + caddr_t *dposp; + struct proc *p; +{ + register int i, rem; + register struct mbuf *md; + register char *fromcp, *tocp; + struct vnode *dp; + int error, rdonly; + struct componentname *cnp = &ndp->ni_cnd; + + MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK); + /* + * Copy the name from the mbuf list to ndp->ni_pnbuf + * and set the various ndp fields appropriately. + */ + fromcp = *dposp; + tocp = cnp->cn_pnbuf; + md = *mdp; + rem = mtod(md, caddr_t) + md->m_len - fromcp; + for (i = 0; i < len; i++) { + while (rem == 0) { + md = md->m_next; + if (md == NULL) { + error = EBADRPC; + goto out; + } + fromcp = mtod(md, caddr_t); + rem = md->m_len; + } + if (*fromcp == '\0' || *fromcp == '/') { + error = EINVAL; + goto out; + } + *tocp++ = *fromcp++; + rem--; + } + *tocp = '\0'; + *mdp = md; + *dposp = fromcp; + len = nfsm_rndup(len)-len; + if (len > 0) { + if (rem >= len) + *dposp += len; + else if (error = nfs_adv(mdp, dposp, len, rem)) + goto out; + } + ndp->ni_pathlen = tocp - cnp->cn_pnbuf; + cnp->cn_nameptr = cnp->cn_pnbuf; + /* + * Extract and set starting directory. + */ + if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, + nam, &rdonly)) + goto out; + if (dp->v_type != VDIR) { + vrele(dp); + error = ENOTDIR; + goto out; + } + ndp->ni_startdir = dp; + if (rdonly) + cnp->cn_flags |= (NOCROSSMOUNT | RDONLY); + else + cnp->cn_flags |= NOCROSSMOUNT; + /* + * And call lookup() to do the real work + */ + cnp->cn_proc = p; + if (error = lookup(ndp)) + goto out; + /* + * Check for encountering a symbolic link + */ + if (cnp->cn_flags & ISSYMLINK) { + if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) + vput(ndp->ni_dvp); + else + vrele(ndp->ni_dvp); + vput(ndp->ni_vp); + ndp->ni_vp = NULL; + error = EINVAL; + goto out; + } + /* + * Check for saved name request + */ + if (cnp->cn_flags & (SAVENAME | SAVESTART)) { + cnp->cn_flags |= HASBUF; + return (0); + } +out: + FREE(cnp->cn_pnbuf, M_NAMEI); + return (error); +} + +/* + * A fiddled version of m_adj() that ensures null fill to a long + * boundary and only trims off the back end + */ +void +nfsm_adj(mp, len, nul) + struct mbuf *mp; + register int len; + int nul; +{ + register struct mbuf *m; + register int count, i; + register char *cp; + + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + count = 0; + m = mp; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len > len) { + m->m_len -= len; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + for (m = mp; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + if (nul > 0) { + cp = mtod(m, caddr_t)+m->m_len-nul; + for (i = 0; i < nul; i++) + *cp++ = '\0'; + } + break; + } + count -= m->m_len; + } + while (m = m->m_next) + m->m_len = 0; +} + +/* + * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) + * - look up fsid in mount list (if not found ret error) + * - get vp and export rights by calling VFS_FHTOVP() + * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon + * - if not lockflag unlock it with VOP_UNLOCK() + */ +nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp) + fhandle_t *fhp; + int lockflag; + struct vnode **vpp; + struct ucred *cred; + struct nfssvc_sock *slp; + struct mbuf *nam; + int *rdonlyp; +{ + register struct mount *mp; + register struct nfsuid *uidp; + register int i; + struct ucred *credanon; + int error, exflags; + + *vpp = (struct vnode *)0; + if ((mp = getvfs(&fhp->fh_fsid)) == NULL) + return (ESTALE); + if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon)) + return (error); + /* + * Check/setup credentials. + */ + if (exflags & MNT_EXKERB) { + for (uidp = NUIDHASH(slp, cred->cr_uid)->lh_first; uidp != 0; + uidp = uidp->nu_hash.le_next) { + if (uidp->nu_uid == cred->cr_uid) + break; + } + if (uidp == 0) { + vput(*vpp); + return (NQNFS_AUTHERR); + } + cred->cr_uid = uidp->nu_cr.cr_uid; + cred->cr_gid = uidp->nu_cr.cr_gid; + for (i = 0; i < uidp->nu_cr.cr_ngroups; i++) + cred->cr_groups[i] = uidp->nu_cr.cr_groups[i]; + cred->cr_ngroups = i; + } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { + cred->cr_uid = credanon->cr_uid; + cred->cr_gid = credanon->cr_gid; + for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) + cred->cr_groups[i] = credanon->cr_groups[i]; + cred->cr_ngroups = i; + } + if (exflags & MNT_EXRDONLY) + *rdonlyp = 1; + else + *rdonlyp = 0; + if (!lockflag) + VOP_UNLOCK(*vpp); + return (0); +} + +/* + * This function compares two net addresses by family and returns TRUE + * if they are the same host. + * If there is any doubt, return FALSE. + * The AF_INET family is handled as a special case so that address mbufs + * don't need to be saved to store "struct in_addr", which is only 4 bytes. + */ +netaddr_match(family, haddr, nam) + int family; + union nethostaddr *haddr; + struct mbuf *nam; +{ + register struct sockaddr_in *inetaddr; + + switch (family) { + case AF_INET: + inetaddr = mtod(nam, struct sockaddr_in *); + if (inetaddr->sin_family == AF_INET && + inetaddr->sin_addr.s_addr == haddr->had_inetaddr) + return (1); + break; +#ifdef ISO + case AF_ISO: + { + register struct sockaddr_iso *isoaddr1, *isoaddr2; + + isoaddr1 = mtod(nam, struct sockaddr_iso *); + isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); + if (isoaddr1->siso_family == AF_ISO && + isoaddr1->siso_nlen > 0 && + isoaddr1->siso_nlen == isoaddr2->siso_nlen && + SAME_ISOADDR(isoaddr1, isoaddr2)) + return (1); + break; + } +#endif /* ISO */ + default: + break; + }; + return (0); +} diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c new file mode 100644 index 00000000000..213062d8030 --- /dev/null +++ b/sys/nfs/nfs_syscalls.c @@ -0,0 +1,885 @@ +/* $NetBSD: nfs_syscalls.c,v 1.16 1995/10/07 06:28:57 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/domain.h> +#include <sys/protosw.h> +#include <sys/namei.h> +#include <sys/syslog.h> + +#include <sys/syscallargs.h> + +#include <netinet/in.h> +#include <netinet/tcp.h> +#ifdef ISO +#include <netiso/iso.h> +#endif +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsrvcache.h> +#include <nfs/nfsmount.h> +#include <nfs/nfsnode.h> +#include <nfs/nqnfs.h> +#include <nfs/nfsrtt.h> + +/* Global defs. */ +extern u_long nfs_prog, nfs_vers; +extern int (*nfsrv_procs[NFS_NPROCS])(); +extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +extern int nfs_numasync; +extern time_t nqnfsstarttime; +extern int nqsrv_writeslack; +extern int nfsrtton; +struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; +int nuidhash_max = NFS_MAXUIDHASH; +static int nfs_numnfsd = 0; +int nfsd_waiting = 0; +static int notstarted = 1; +static int modify_flag = 0; +static struct nfsdrt nfsdrt; +void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock(); +static void nfsd_rt(); +void nfsrv_slpderef(), nfsrv_init(); + +#define TRUE 1 +#define FALSE 0 + +static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; +/* + * NFS server system calls + * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c + */ + +/* + * Get file handle system call + */ +sys_getfh(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_getfh_args /* { + syscallarg(char *) fname; + syscallarg(fhandle_t *) fhp; + } */ *uap = v; + register struct vnode *vp; + fhandle_t fh; + int error; + struct nameidata nd; + + /* + * Must be super user + */ + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, fname), p); + if (error = namei(&nd)) + return (error); + vp = nd.ni_vp; + bzero((caddr_t)&fh, sizeof(fh)); + fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fh.fh_fid); + vput(vp); + if (error) + return (error); + error = copyout((caddr_t)&fh, (caddr_t)SCARG(uap, fhp), sizeof (fh)); + return (error); +} + +/* + * Nfs server psuedo system call for the nfsd's + * Based on the flag value it either: + * - adds a socket to the selection list + * - remains in the kernel as an nfsd + * - remains in the kernel as an nfsiod + */ +sys_nfssvc(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + register struct sys_nfssvc_args /* { + syscallarg(int) flag; + syscallarg(caddr_t) argp; + } */ *uap = v; + struct nameidata nd; + struct file *fp; + struct mbuf *nam; + struct nfsd_args nfsdarg; + struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs; + struct nfsd_cargs ncd; + struct nfsd *nfsd; + struct nfssvc_sock *slp; + struct nfsuid *nuidp, **nuh; + struct nfsmount *nmp; + int error; + + /* + * Must be super user + */ + if (error = suser(p->p_ucred, &p->p_acflag)) + return (error); + while (nfssvc_sockhead_flag & SLP_INIT) { + nfssvc_sockhead_flag |= SLP_WANTINIT; + (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0); + } + if (SCARG(uap, flag) & NFSSVC_BIOD) { +#ifndef NFSCLIENT + error = ENOSYS; +#else /* !NFSCLIENT */ + error = nfssvc_iod(p); +#endif /* !NFSCLIENT */ + } else if (SCARG(uap, flag) & NFSSVC_MNTD) { +#ifndef NFSCLIENT + error = ENOSYS; +#else /* !NFSCLIENT */ + if (error = + copyin(SCARG(uap, argp), (caddr_t)&ncd, sizeof (ncd))) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + ncd.ncd_dirp, p); + if (error = namei(&nd)) + return (error); + if ((nd.ni_vp->v_flag & VROOT) == 0) + error = EINVAL; + nmp = VFSTONFS(nd.ni_vp->v_mount); + vput(nd.ni_vp); + if (error) + return (error); + if ((nmp->nm_flag & NFSMNT_MNTD) && + (SCARG(uap, flag) & NFSSVC_GOTAUTH) == 0) + return (0); + nmp->nm_flag |= NFSMNT_MNTD; + error = nqnfs_clientd(nmp, p->p_ucred, &ncd, SCARG(uap, flag), + SCARG(uap, argp), p); +#endif /* !NFSCLIENT */ + } else if (SCARG(uap, flag) & NFSSVC_ADDSOCK) { +#ifndef NFSSERVER + error = ENOSYS; +#else /* !NFSSERVER */ + if (error = copyin(SCARG(uap, argp), (caddr_t)&nfsdarg, + sizeof(nfsdarg))) + return (error); + if (error = getsock(p->p_fd, nfsdarg.sock, &fp)) + return (error); + /* + * Get the client address for connected sockets. + */ + if (nfsdarg.name == NULL || nfsdarg.namelen == 0) + nam = (struct mbuf *)0; + else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen, + MT_SONAME)) + return (error); + error = nfssvc_addsock(fp, nam); +#endif /* !NFSSERVER */ + } else { +#ifndef NFSSERVER + error = ENOSYS; +#else /* !NFSSERVER */ + if (error = copyin(SCARG(uap, argp), (caddr_t)nsd, + sizeof (*nsd))) + return (error); + if ((SCARG(uap, flag) & NFSSVC_AUTHIN) && + (nfsd = nsd->nsd_nfsd) && + (nfsd->nd_slp->ns_flag & SLP_VALID)) { + slp = nfsd->nd_slp; + + /* + * First check to see if another nfsd has already + * added this credential. + */ + for (nuidp = NUIDHASH(slp, nsd->nsd_uid)->lh_first; + nuidp != 0; nuidp = nuidp->nu_hash.le_next) { + if (nuidp->nu_uid == nsd->nsd_uid) + break; + } + if (nuidp == 0) { + /* + * Nope, so we will. + */ + if (slp->ns_numuids < nuidhash_max) { + slp->ns_numuids++; + nuidp = (struct nfsuid *) + malloc(sizeof (struct nfsuid), M_NFSUID, + M_WAITOK); + } else + nuidp = (struct nfsuid *)0; + if ((slp->ns_flag & SLP_VALID) == 0) { + if (nuidp) + free((caddr_t)nuidp, M_NFSUID); + } else { + if (nuidp == (struct nfsuid *)0) { + nuidp = slp->ns_uidlruhead.tqh_first; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, + nu_lru); + } + nuidp->nu_cr = nsd->nsd_cr; + if (nuidp->nu_cr.cr_ngroups > NGROUPS) + nuidp->nu_cr.cr_ngroups = NGROUPS; + nuidp->nu_cr.cr_ref = 1; + nuidp->nu_uid = nsd->nsd_uid; + TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp, + nu_lru); + LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid), + nuidp, nu_hash); + } + } + } + if ((SCARG(uap, flag) & NFSSVC_AUTHINFAIL) && + (nfsd = nsd->nsd_nfsd)) + nfsd->nd_flag |= NFSD_AUTHFAIL; + error = nfssvc_nfsd(nsd, SCARG(uap, argp), p); +#endif /* !NFSSERVER */ + } + if (error == EINTR || error == ERESTART) + error = 0; + return (error); +} + +#ifdef NFSSERVER +/* + * Adds a socket to the list for servicing by nfsds. + */ +nfssvc_addsock(fp, mynam) + struct file *fp; + struct mbuf *mynam; +{ + register struct mbuf *m; + register int siz; + register struct nfssvc_sock *slp; + register struct socket *so; + struct nfssvc_sock *tslp; + int error, s; + + so = (struct socket *)fp->f_data; + tslp = (struct nfssvc_sock *)0; + /* + * Add it to the list, as required. + */ + if (so->so_proto->pr_protocol == IPPROTO_UDP) { + tslp = nfs_udpsock; + if (tslp->ns_flag & SLP_VALID) { + m_freem(mynam); + return (EPERM); + } +#ifdef ISO + } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) { + tslp = nfs_cltpsock; + if (tslp->ns_flag & SLP_VALID) { + m_freem(mynam); + return (EPERM); + } +#endif /* ISO */ + } + if (so->so_type == SOCK_STREAM) + siz = NFS_MAXPACKET + sizeof (u_long); + else + siz = NFS_MAXPACKET; + if (error = soreserve(so, siz, siz)) { + m_freem(mynam); + return (error); + } + + /* + * Set protocol specific options { for now TCP only } and + * reserve some space. For datagram sockets, this can get called + * repeatedly for the same socket, but that isn't harmful. + */ + if (so->so_type == SOCK_STREAM) { + MGET(m, M_WAIT, MT_SOOPTS); + *mtod(m, int *) = 1; + m->m_len = sizeof(int); + sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); + } + if (so->so_proto->pr_domain->dom_family == AF_INET && + so->so_proto->pr_protocol == IPPROTO_TCP) { + MGET(m, M_WAIT, MT_SOOPTS); + *mtod(m, int *) = 1; + m->m_len = sizeof(int); + sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); + } + so->so_rcv.sb_flags &= ~SB_NOINTR; + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_flags &= ~SB_NOINTR; + so->so_snd.sb_timeo = 0; + if (tslp) + slp = tslp; + else { + slp = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)slp, sizeof (struct nfssvc_sock)); + slp->ns_uidhashtbl = + hashinit(NUIDHASHSIZ, M_NFSSVC, &slp->ns_uidhash); + TAILQ_INIT(&slp->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); + } + slp->ns_so = so; + slp->ns_nam = mynam; + fp->f_count++; + slp->ns_fp = fp; + s = splsoftnet(); + so->so_upcallarg = (caddr_t)slp; + so->so_upcall = nfsrv_rcv; + slp->ns_flag = (SLP_VALID | SLP_NEEDQ); + nfsrv_wakenfsd(slp); + splx(s); + return (0); +} + +/* + * Called by nfssvc() for nfsds. Just loops around servicing rpc requests + * until it is killed by a signal. + */ +nfssvc_nfsd(nsd, argp, p) + struct nfsd_srvargs *nsd; + caddr_t argp; + struct proc *p; +{ + register struct mbuf *m, *nam2; + register int siz; + register struct nfssvc_sock *slp; + register struct socket *so; + register int *solockp; + struct nfsd *nd = nsd->nsd_nfsd; + struct mbuf *mreq, *nam; + struct timeval starttime; + struct nfsuid *uidp; + int error, cacherep, s; + int sotype; + + s = splsoftnet(); + if (nd == (struct nfsd *)0) { + nsd->nsd_nfsd = nd = (struct nfsd *) + malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK); + bzero((caddr_t)nd, sizeof (struct nfsd)); + nd->nd_procp = p; + nd->nd_cr.cr_ref = 1; + TAILQ_INSERT_TAIL(&nfsd_head, nd, nd_chain); + nd->nd_nqlflag = NQL_NOVAL; + nfs_numnfsd++; + } + /* + * Loop getting rpc requests until SIGKILL. + */ + for (;;) { + if ((nd->nd_flag & NFSD_REQINPROG) == 0) { + while (nd->nd_slp == (struct nfssvc_sock *)0 && + (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + nd->nd_flag |= NFSD_WAITING; + nfsd_waiting++; + error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0); + nfsd_waiting--; + if (error) + goto done; + } + if (nd->nd_slp == (struct nfssvc_sock *)0 && + (nfsd_head_flag & NFSD_CHECKSLP) != 0) { + for (slp = nfssvc_sockhead.tqh_first; slp != 0; + slp = slp->ns_chain.tqe_next) { + if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) + == (SLP_VALID | SLP_DOREC)) { + slp->ns_flag &= ~SLP_DOREC; + slp->ns_sref++; + nd->nd_slp = slp; + break; + } + } + if (slp == 0) + nfsd_head_flag &= ~NFSD_CHECKSLP; + } + if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0) + continue; + if (slp->ns_flag & SLP_VALID) { + if (slp->ns_flag & SLP_DISCONN) + nfsrv_zapsock(slp); + else if (slp->ns_flag & SLP_NEEDQ) { + slp->ns_flag &= ~SLP_NEEDQ; + (void) nfs_sndlock(&slp->ns_solock, + (struct nfsreq *)0); + nfsrv_rcv(slp->ns_so, (caddr_t)slp, + M_WAIT); + nfs_sndunlock(&slp->ns_solock); + } + error = nfsrv_dorec(slp, nd); + nd->nd_flag |= NFSD_REQINPROG; + } + } else { + error = 0; + slp = nd->nd_slp; + } + if (error || (slp->ns_flag & SLP_VALID) == 0) { + nd->nd_slp = (struct nfssvc_sock *)0; + nd->nd_flag &= ~NFSD_REQINPROG; + nfsrv_slpderef(slp); + continue; + } + splx(s); + so = slp->ns_so; + sotype = so->so_type; + starttime = time; + if (so->so_proto->pr_flags & PR_CONNREQUIRED) + solockp = &slp->ns_solock; + else + solockp = (int *)0; + /* + * nam == nam2 for connectionless protocols such as UDP + * nam2 == NULL for connection based protocols to disable + * recent request caching. + */ + if (nam2 = nd->nd_nam) { + nam = nam2; + cacherep = RC_CHECKIT; + } else { + nam = slp->ns_nam; + cacherep = RC_DOIT; + } + + /* + * Check to see if authorization is needed. + */ + if (nd->nd_flag & NFSD_NEEDAUTH) { + static int logauth = 0; + + nd->nd_flag &= ~NFSD_NEEDAUTH; + /* + * Check for a mapping already installed. + */ + for (uidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; + uidp != 0; uidp = uidp->nu_hash.le_next) { + if (uidp->nu_uid == nd->nd_cr.cr_uid) + break; + } + if (uidp == 0) { + nsd->nsd_uid = nd->nd_cr.cr_uid; + if (nam2 && logauth++ == 0) + log(LOG_WARNING, "Kerberized NFS using UDP\n"); + nsd->nsd_haddr = + mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; + nsd->nsd_authlen = nd->nd_authlen; + if (copyout(nd->nd_authstr, nsd->nsd_authstr, + nd->nd_authlen) == 0 && + copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0) + return (ENEEDAUTH); + cacherep = RC_DROPIT; + } + } + if (cacherep == RC_CHECKIT) + cacherep = nfsrv_getcache(nam2, nd, &mreq); + + /* + * Check for just starting up for NQNFS and send + * fake "try again later" replies to the NQNFS clients. + */ + if (notstarted && nqnfsstarttime <= time.tv_sec) { + if (modify_flag) { + nqnfsstarttime = time.tv_sec + nqsrv_writeslack; + modify_flag = 0; + } else + notstarted = 0; + } + if (notstarted) { + if (nd->nd_nqlflag == NQL_NOVAL) + cacherep = RC_DROPIT; + else if (nd->nd_procnum != NFSPROC_WRITE) { + nd->nd_procnum = NFSPROC_NOOP; + nd->nd_repstat = NQNFS_TRYLATER; + cacherep = RC_DOIT; + } else + modify_flag = 1; + } else if (nd->nd_flag & NFSD_AUTHFAIL) { + nd->nd_flag &= ~NFSD_AUTHFAIL; + nd->nd_procnum = NFSPROC_NOOP; + nd->nd_repstat = NQNFS_AUTHERR; + cacherep = RC_DOIT; + } + + switch (cacherep) { + case RC_DOIT: + error = (*(nfsrv_procs[nd->nd_procnum]))(nd, + nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr, + nam, &mreq); + if (nd->nd_cr.cr_ref != 1) { + printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref); + panic("nfssvc cref"); + } + if (error) { + if (nd->nd_procnum != NQNFSPROC_VACATED) + nfsstats.srv_errs++; + if (nam2) { + nfsrv_updatecache(nam2, nd, FALSE, mreq); + m_freem(nam2); + } + break; + } + nfsstats.srvrpccnt[nd->nd_procnum]++; + if (nam2) + nfsrv_updatecache(nam2, nd, TRUE, mreq); + nd->nd_mrep = (struct mbuf *)0; + case RC_REPLY: + m = mreq; + siz = 0; + while (m) { + siz += m->m_len; + m = m->m_next; + } + if (siz <= 0 || siz > NFS_MAXPACKET) { + printf("mbuf siz=%d\n",siz); + panic("Bad nfs svc reply"); + } + m = mreq; + m->m_pkthdr.len = siz; + m->m_pkthdr.rcvif = (struct ifnet *)0; + /* + * For stream protocols, prepend a Sun RPC + * Record Mark. + */ + if (sotype == SOCK_STREAM) { + M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); + *mtod(m, u_long *) = htonl(0x80000000 | siz); + } + if (solockp) + (void) nfs_sndlock(solockp, (struct nfsreq *)0); + if (slp->ns_flag & SLP_VALID) + error = nfs_send(so, nam2, m, (struct nfsreq *)0); + else { + error = EPIPE; + m_freem(m); + } + if (nfsrtton) + nfsd_rt(&starttime, sotype, nd, nam, cacherep); + if (nam2) + MFREE(nam2, m); + if (nd->nd_mrep) + m_freem(nd->nd_mrep); + if (error == EPIPE) + nfsrv_zapsock(slp); + if (solockp) + nfs_sndunlock(solockp); + if (error == EINTR || error == ERESTART) { + nfsrv_slpderef(slp); + s = splsoftnet(); + goto done; + } + break; + case RC_DROPIT: + if (nfsrtton) + nfsd_rt(&starttime, sotype, nd, nam, cacherep); + m_freem(nd->nd_mrep); + m_freem(nam2); + break; + }; + s = splsoftnet(); + if (nfsrv_dorec(slp, nd)) { + nd->nd_flag &= ~NFSD_REQINPROG; + nd->nd_slp = (struct nfssvc_sock *)0; + nfsrv_slpderef(slp); + } + } +done: + TAILQ_REMOVE(&nfsd_head, nd, nd_chain); + splx(s); + free((caddr_t)nd, M_NFSD); + nsd->nsd_nfsd = (struct nfsd *)0; + if (--nfs_numnfsd == 0) + nfsrv_init(TRUE); /* Reinitialize everything */ + return (error); +} + +/* + * Shut down a socket associated with an nfssvc_sock structure. + * Should be called with the send lock set, if required. + * The trick here is to increment the sref at the start, so that the nfsds + * will stop using it and clear ns_flag at the end so that it will not be + * reassigned during cleanup. + */ +nfsrv_zapsock(slp) + register struct nfssvc_sock *slp; +{ + register struct nfsuid *nuidp, *nnuidp; + register int i; + struct socket *so; + struct file *fp; + struct mbuf *m; + + slp->ns_flag &= ~SLP_ALLFLAGS; + if (fp = slp->ns_fp) { + slp->ns_fp = (struct file *)0; + so = slp->ns_so; + so->so_upcall = NULL; + soshutdown(so, 2); + closef(fp, (struct proc *)0); + if (slp->ns_nam) + MFREE(slp->ns_nam, m); + m_freem(slp->ns_raw); + m_freem(slp->ns_rec); + for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; + nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); + free((caddr_t)nuidp, M_NFSUID); + } + } +} + +/* + * Derefence a server socket structure. If it has no more references and + * is no longer valid, you can throw it away. + */ +void +nfsrv_slpderef(slp) + register struct nfssvc_sock *slp; +{ + if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + free((caddr_t)slp, M_NFSSVC); + } +} + +/* + * Initialize the data structures for the server. + * Handshake with any new nfsds starting up to avoid any chance of + * corruption. + */ +void +nfsrv_init(terminating) + int terminating; +{ + register struct nfssvc_sock *slp, *nslp; + + if (nfssvc_sockhead_flag & SLP_INIT) + panic("nfsd init"); + nfssvc_sockhead_flag |= SLP_INIT; + if (terminating) { + for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) { + nslp = slp->ns_chain.tqe_next; + if (slp->ns_flag & SLP_VALID) + nfsrv_zapsock(slp); + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + free((caddr_t)slp, M_NFSSVC); + } + nfsrv_cleancache(); /* And clear out server cache */ + } + + TAILQ_INIT(&nfssvc_sockhead); + nfssvc_sockhead_flag &= ~SLP_INIT; + if (nfssvc_sockhead_flag & SLP_WANTINIT) { + nfssvc_sockhead_flag &= ~SLP_WANTINIT; + wakeup((caddr_t)&nfssvc_sockhead); + } + + TAILQ_INIT(&nfsd_head); + nfsd_head_flag &= ~NFSD_CHECKSLP; + + nfs_udpsock = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); + nfs_udpsock->ns_uidhashtbl = + hashinit(NUIDHASHSIZ, M_NFSSVC, &nfs_udpsock->ns_uidhash); + TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); + TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + + nfs_cltpsock = (struct nfssvc_sock *) + malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK); + bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); + nfs_cltpsock->ns_uidhashtbl = + hashinit(NUIDHASHSIZ, M_NFSSVC, &nfs_cltpsock->ns_uidhash); + TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); +} + +/* + * Add entries to the server monitor log. + */ +static void +nfsd_rt(startp, sotype, nd, nam, cacherep) + struct timeval *startp; + int sotype; + register struct nfsd *nd; + struct mbuf *nam; + int cacherep; +{ + register struct drt *rt; + + rt = &nfsdrt.drt[nfsdrt.pos]; + if (cacherep == RC_DOIT) + rt->flag = 0; + else if (cacherep == RC_REPLY) + rt->flag = DRT_CACHEREPLY; + else + rt->flag = DRT_CACHEDROP; + if (sotype == SOCK_STREAM) + rt->flag |= DRT_TCP; + if (nd->nd_nqlflag != NQL_NOVAL) + rt->flag |= DRT_NQNFS; + rt->proc = nd->nd_procnum; + if (mtod(nam, struct sockaddr *)->sa_family == AF_INET) + rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; + else + rt->ipadr = INADDR_ANY; + rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) + + (time.tv_usec - startp->tv_usec); + rt->tstamp = time; + nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ; +} +#endif /* NFSSERVER */ + +#ifdef NFSCLIENT +/* + * Asynchronous I/O daemons for client nfs. + * They do read-ahead and write-behind operations on the block I/O cache. + * Never returns unless it fails or gets killed. + */ +nfssvc_iod(p) + struct proc *p; +{ + register struct buf *bp; + register int i, myiod; + int error = 0; + + /* + * Assign my position or return error if too many already running + */ + myiod = -1; + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + if (nfs_asyncdaemon[i] == 0) { + nfs_asyncdaemon[i]++; + myiod = i; + break; + } + if (myiod == -1) + return (EBUSY); + nfs_numasync++; + /* + * Just loop around doin our stuff until SIGKILL + */ + for (;;) { + while (nfs_bufq.tqh_first == NULL && error == 0) { + nfs_iodwant[myiod] = p; + error = tsleep((caddr_t)&nfs_iodwant[myiod], + PWAIT | PCATCH, "nfsidl", 0); + } + while ((bp = nfs_bufq.tqh_first) != NULL) { + /* Take one off the front of the list */ + TAILQ_REMOVE(&nfs_bufq, bp, b_freelist); + if (bp->b_flags & B_READ) + (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0); + else + (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0); + } + if (error) { + nfs_asyncdaemon[myiod] = 0; + nfs_numasync--; + return (error); + } + } +} + +/* + * Get an authorization string for the uid by having the mount_nfs sitting + * on this mount point porpous out of the kernel and do it. + */ +nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len) + register struct nfsmount *nmp; + struct nfsreq *rep; + struct ucred *cred; + int *auth_type; + char **auth_str; + int *auth_len; +{ + int error = 0; + + while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) { + nmp->nm_flag |= NFSMNT_WANTAUTH; + (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK, + "nfsauth1", 2 * hz); + if (error = nfs_sigintr(nmp, rep, rep->r_procp)) { + nmp->nm_flag &= ~NFSMNT_WANTAUTH; + return (error); + } + } + nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH); + nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); + nmp->nm_authuid = cred->cr_uid; + wakeup((caddr_t)&nmp->nm_authstr); + + /* + * And wait for mount_nfs to do its stuff. + */ + while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) { + (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK, + "nfsauth2", 2 * hz); + error = nfs_sigintr(nmp, rep, rep->r_procp); + } + if (nmp->nm_flag & NFSMNT_AUTHERR) { + nmp->nm_flag &= ~NFSMNT_AUTHERR; + error = EAUTH; + } + if (error) + free((caddr_t)*auth_str, M_TEMP); + else { + *auth_type = nmp->nm_authtype; + *auth_len = nmp->nm_authlen; + } + nmp->nm_flag &= ~NFSMNT_HASAUTH; + nmp->nm_flag |= NFSMNT_WAITAUTH; + if (nmp->nm_flag & NFSMNT_WANTAUTH) { + nmp->nm_flag &= ~NFSMNT_WANTAUTH; + wakeup((caddr_t)&nmp->nm_authtype); + } + return (error); +} +#endif /* NFSCLIENT */ diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c new file mode 100644 index 00000000000..345ca048869 --- /dev/null +++ b/sys/nfs/nfs_vfsops.c @@ -0,0 +1,785 @@ +/* $NetBSD: nfs_vfsops.c,v 1.38 1995/08/13 00:00:08 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94 + */ + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/ioctl.h> +#include <sys/signal.h> +#include <sys/proc.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/systm.h> + +#include <net/if.h> +#include <net/route.h> +#include <netinet/in.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> +#include <nfs/nfs.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nfsdiskless.h> +#include <nfs/nqnfs.h> + +/* + * nfs vfs operations. + */ +struct vfsops nfs_vfsops = { + MOUNT_NFS, + nfs_mount, + nfs_start, + nfs_unmount, + nfs_root, + nfs_quotactl, + nfs_statfs, + nfs_sync, + nfs_vget, + nfs_fhtovp, + nfs_vptofh, + nfs_init, +}; + +extern u_long nfs_procids[NFS_NPROCS]; +extern u_long nfs_prog, nfs_vers; +void nfs_disconnect __P((struct nfsmount *)); + +static struct mount * +nfs_mount_diskless __P((struct nfs_dlmount *, char *, int, struct vnode **)); + +#define TRUE 1 +#define FALSE 0 + +/* + * nfs statfs call + */ +int +nfs_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + register struct vnode *vp; + register struct nfsv2_statfs *sfp; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + int error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nfsmount *nmp; + struct ucred *cred; + struct nfsnode *np; + + nmp = VFSTONFS(mp); + isnq = (nmp->nm_flag & NFSMNT_NQNFS); + if (error = nfs_nget(mp, &nmp->nm_fh, &np)) + return (error); + vp = NFSTOV(np); + nfsstats.rpccnt[NFSPROC_STATFS]++; + cred = crget(); + cred->cr_ngroups = 0; + nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH); + nfsm_fhtom(vp); + nfsm_request(vp, NFSPROC_STATFS, p, cred); + nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq)); +#ifdef COMPAT_09 + sbp->f_type = 2; +#else + sbp->f_type = 0; +#endif + sbp->f_flags = nmp->nm_flag; + sbp->f_iosize = NFS_MAXDGRAMDATA; + sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize); + sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks); + sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree); + sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail); + if (isnq) { + sbp->f_files = fxdr_unsigned(long, sfp->sf_files); + sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree); + } else { + sbp->f_files = 0; + sbp->f_ffree = 0; + } + if (sbp != &mp->mnt_stat) { + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + nfsm_reqdone; + vrele(vp); + crfree(cred); + return (error); +} + +/* + * Mount a remote root fs via. NFS. It goes like this: + * - Call nfs_boot_init() to fill in the nfs_diskless struct + * (using RARP, bootparam RPC, mountd RPC) + * - hand craft the swap nfs vnode hanging off a fake mount point + * if swdevt[0].sw_dev == NODEV + * - build the rootfs mount point and call mountnfs() to do the rest. + */ +int +nfs_mountroot() +{ + struct nfs_diskless nd; + struct vattr attr; + struct mount *mp; + struct vnode *vp; + struct proc *procp; + struct ucred *cred; + long n; + int error; + + procp = curproc; /* XXX */ + + /* + * XXX time must be non-zero when we init the interface or else + * the arp code will wedge. [Fixed now in if_ether.c] + * However, the NFS attribute cache gives false "hits" when + * time.tv_sec < NFS_ATTRTIMEO(np) so keep this in for now. + */ + if (time.tv_sec < NFS_MAXATTRTIMO) + time.tv_sec = NFS_MAXATTRTIMO; + + /* + * Call nfs_boot_init() to fill in the nfs_diskless struct. + * Side effect: Finds and configures a network interface. + */ + bzero((caddr_t) &nd, sizeof(nd)); + nfs_boot_init(&nd, procp); + + /* + * Create the root mount point. + */ + mp = nfs_mount_diskless(&nd.nd_root, "/", 0, &vp); + printf("root on %s\n", &nd.nd_root.ndm_host); + + /* + * Link it into the mount list. + */ + if (vfs_lock(mp)) + panic("nfs_mountroot: vfs_lock"); + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mp->mnt_vnodecovered = NULLVP; + vfs_unlock(mp); + rootvp = vp; + + /* Get root attributes (for the time). */ + error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp); + if (error) panic("nfs_mountroot: getattr for root"); + n = attr.va_mtime.ts_sec; +#ifdef DEBUG + printf("root time: 0x%x\n", n); +#endif + inittodr(n); + +#ifdef notyet + /* Set up swap credentials. */ + proc0.p_ucred->cr_uid = ntohl(nd.swap_ucred.cr_uid); + proc0.p_ucred->cr_gid = ntohl(nd.swap_ucred.cr_gid); + if ((proc0.p_ucred->cr_ngroups = ntohs(nd.swap_ucred.cr_ngroups)) > + NGROUPS) + proc0.p_ucred->cr_ngroups = NGROUPS; + for (i = 0; i < proc0.p_ucred->cr_ngroups; i++) + proc0.p_ucred->cr_groups[i] = ntohl(nd.swap_ucred.cr_groups[i]); +#endif + + /* + * "Mount" the swap device. + * + * On a "dataless" configuration (swap on disk) we will have: + * (swdevt[0].sw_dev != NODEV) identifying the swap device. + */ + if (bdevvp(swapdev, &swapdev_vp)) + panic("nfs_mountroot: can't setup swap vp"); + if (swdevt[0].sw_dev != NODEV) { + printf("swap on device 0x%x\n", swdevt[0].sw_dev); + return (0); + } + + /* + * If swapping to an nfs node: (swdevt[0].sw_dev == NODEV) + * Create a fake mount point just for the swap vnode so that the + * swap file can be on a different server from the rootfs. + */ + mp = nfs_mount_diskless(&nd.nd_swap, "/swap", 0, &vp); + printf("swap on %s\n", &nd.nd_swap.ndm_host); + + /* + * Since the swap file is not the root dir of a file system, + * hack it to a regular file. + */ + vp->v_type = VREG; + vp->v_flag = 0; + swdevt[0].sw_vp = vp; + + /* + * Find out how large the swap file is. + */ + error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp); + if (error) + panic("nfs_mountroot: getattr for swap"); + n = (long) (attr.va_size >> DEV_BSHIFT); +#ifdef DEBUG + printf("swap size: 0x%x (blocks)\n", n); +#endif + swdevt[0].sw_nblks = n; + + return (0); +} + +/* + * Internal version of mount system call for diskless setup. + */ +static struct mount * +nfs_mount_diskless(ndmntp, mntname, mntflag, vpp) + struct nfs_dlmount *ndmntp; + char *mntname; + int mntflag; + struct vnode **vpp; +{ + struct nfs_args args; + struct mount *mp; + struct mbuf *m; + int error; + + /* Create the mount point. */ + mp = (struct mount *)malloc((u_long)sizeof(struct mount), + M_MOUNT, M_NOWAIT); + if (mp == NULL) + panic("nfs_mountroot: malloc mount for %s", mntname); + bzero((char *)mp, (u_long)sizeof(struct mount)); + mp->mnt_op = &nfs_vfsops; + mp->mnt_flag = mntflag; + + /* Initialize mount args. */ + bzero((caddr_t) &args, sizeof(args)); + args.addr = (struct sockaddr *)&ndmntp->ndm_saddr; + args.addrlen = args.addr->sa_len; + args.sotype = SOCK_DGRAM; + args.fh = (nfsv2fh_t *)ndmntp->ndm_fh; + args.hostname = ndmntp->ndm_host; + args.flags = NFSMNT_RESVPORT; + +#ifdef NFS_BOOT_OPTIONS + args.flags |= NFS_BOOT_OPTIONS; +#endif +#ifdef NFS_BOOT_RWSIZE + /* + * Reduce rsize,wsize for interfaces that consistently + * drop fragments of long UDP messages. (i.e. wd8003). + * You can always change these later via remount. + */ + args.flags |= NFSMNT_WSIZE | NFSMNT_RSIZE; + args.wsize = NFS_BOOT_RWSIZE; + args.rsize = NFS_BOOT_RWSIZE; +#endif + + /* Get mbuf for server sockaddr. */ + m = m_get(M_WAIT, MT_SONAME); + if (m == NULL) + panic("nfs_mountroot: mget soname for %s", mntname); + bcopy((caddr_t)args.addr, mtod(m, caddr_t), + (m->m_len = args.addr->sa_len)); + + if (error = mountnfs(&args, mp, m, mntname, args.hostname, vpp)) + panic("nfs_mountroot: mount %s failed: %d", mntname); + + return (mp); +} + +void +nfs_decode_args(nmp, argp) + struct nfsmount *nmp; + struct nfs_args *argp; +{ + int s; + int adjsock; + + s = splsoftnet(); + + /* Re-bind if rsrvd port requested and wasn't on one */ + adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) + && (argp->flags & NFSMNT_RESVPORT); + + /* Update flags atomically. Don't change the lock bits. */ + nmp->nm_flag = + (argp->flags & ~NFSMNT_INTERNAL) | (nmp->nm_flag & NFSMNT_INTERNAL); + splx(s); + + if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { + nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; + if (nmp->nm_timeo < NFS_MINTIMEO) + nmp->nm_timeo = NFS_MINTIMEO; + else if (nmp->nm_timeo > NFS_MAXTIMEO) + nmp->nm_timeo = NFS_MAXTIMEO; + } + + if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { + nmp->nm_retry = argp->retrans; + if (nmp->nm_retry > NFS_MAXREXMIT) + nmp->nm_retry = NFS_MAXREXMIT; + } + + if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { + int osize = nmp->nm_wsize; + nmp->nm_wsize = argp->wsize; + /* Round down to multiple of blocksize */ + nmp->nm_wsize &= ~0x1ff; + if (nmp->nm_wsize <= 0) + nmp->nm_wsize = 512; + else if (nmp->nm_wsize > NFS_MAXDATA) + nmp->nm_wsize = NFS_MAXDATA; + adjsock |= (nmp->nm_wsize != osize); + } + if (nmp->nm_wsize > MAXBSIZE) + nmp->nm_wsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { + int osize = nmp->nm_rsize; + nmp->nm_rsize = argp->rsize; + /* Round down to multiple of blocksize */ + nmp->nm_rsize &= ~0x1ff; + if (nmp->nm_rsize <= 0) + nmp->nm_rsize = 512; + else if (nmp->nm_rsize > NFS_MAXDATA) + nmp->nm_rsize = NFS_MAXDATA; + adjsock |= (nmp->nm_rsize != osize); + } + if (nmp->nm_rsize > MAXBSIZE) + nmp->nm_rsize = MAXBSIZE; + + if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && + argp->maxgrouplist <= NFS_MAXGRPS) + nmp->nm_numgrps = argp->maxgrouplist; + if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && + argp->readahead <= NFS_MAXRAHEAD) + nmp->nm_readahead = argp->readahead; + if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && + argp->leaseterm <= NQ_MAXLEASE) + nmp->nm_leaseterm = argp->leaseterm; + if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && + argp->deadthresh <= NQ_NEVERDEAD) + nmp->nm_deadthresh = argp->deadthresh; + + if (nmp->nm_so && adjsock) { + nfs_disconnect(nmp); + if (nmp->nm_sotype == SOCK_DGRAM) + while (nfs_connect(nmp, (struct nfsreq *)0)) { + printf("nfs_args: retrying connect\n"); + (void) tsleep((caddr_t)&lbolt, + PSOCK, "nfscon", 0); + } + } +} + +/* + * VFS Operations. + * + * mount system call + * It seems a bit dumb to copyinstr() the host and path here and then + * bcopy() them in mountnfs(), but I wanted to detect errors before + * doing the sockargs() call because sockargs() allocates an mbuf and + * an error after that means that I have to release the mbuf. + */ +/* ARGSUSED */ +int +nfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error; + struct nfs_args args; + struct mbuf *nam; + struct vnode *vp; + char pth[MNAMELEN], hst[MNAMELEN]; + size_t len; + nfsv2fh_t nfh; + + if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args))) + return (error); + if (mp->mnt_flag & MNT_UPDATE) { + register struct nfsmount *nmp = VFSTONFS(mp); + + if (nmp == NULL) + return (EIO); + nfs_decode_args(nmp, &args); + return (0); + } + if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t))) + return (error); + if (error = copyinstr(path, pth, MNAMELEN-1, &len)) + return (error); + bzero(&pth[len], MNAMELEN - len); + if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len)) + return (error); + bzero(&hst[len], MNAMELEN - len); + /* sockargs() call must be after above copyin() calls */ + if (error = sockargs(&nam, (caddr_t)args.addr, + args.addrlen, MT_SONAME)) + return (error); + args.fh = &nfh; + error = mountnfs(&args, mp, nam, pth, hst, &vp); + return (error); +} + +/* + * Common code for mount and mountroot + */ +int +mountnfs(argp, mp, nam, pth, hst, vpp) + register struct nfs_args *argp; + register struct mount *mp; + struct mbuf *nam; + char *pth, *hst; + struct vnode **vpp; +{ + register struct nfsmount *nmp; + struct nfsnode *np; + int error; + + if (mp->mnt_flag & MNT_UPDATE) { + nmp = VFSTONFS(mp); + /* update paths, file handles, etc, here XXX */ + m_freem(nam); + return (0); + } else { + MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount), + M_NFSMNT, M_WAITOK); + bzero((caddr_t)nmp, sizeof (struct nfsmount)); + mp->mnt_data = (qaddr_t)nmp; + } + getnewfsid(mp, makefstype(MOUNT_NFS)); + nmp->nm_mountp = mp; + if ((argp->flags & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) == + (NFSMNT_NQNFS | NFSMNT_MYWRITE)) { + error = EPERM; + goto bad; + } + if (argp->flags & NFSMNT_NQNFS) + /* + * We have to set mnt_maxsymlink to a non-zero value so + * that COMPAT_43 routines will know that we are setting + * the d_type field in directories (and can zero it for + * unsuspecting binaries). + */ + mp->mnt_maxsymlinklen = 1; + nmp->nm_timeo = NFS_TIMEO; + nmp->nm_retry = NFS_RETRANS; + nmp->nm_wsize = NFS_WSIZE; + nmp->nm_rsize = NFS_RSIZE; + nmp->nm_numgrps = NFS_MAXGRPS; + nmp->nm_readahead = NFS_DEFRAHEAD; + nmp->nm_leaseterm = NQ_DEFLEASE; + nmp->nm_deadthresh = NQ_DEADTHRESH; + CIRCLEQ_INIT(&nmp->nm_timerhead); + nmp->nm_inprog = NULLVP; + bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t)); +#ifdef COMPAT_09 + mp->mnt_stat.f_type = 2; +#else + mp->mnt_stat.f_type = 0; +#endif + strncpy(&mp->mnt_stat.f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); + bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); + nmp->nm_nam = nam; + nfs_decode_args(nmp, argp); + + /* Set up the sockets and per-host congestion */ + nmp->nm_sotype = argp->sotype; + nmp->nm_soproto = argp->proto; + + /* + * For Connection based sockets (TCP,...) defer the connect until + * the first request, in case the server is not responding. + */ + if (nmp->nm_sotype == SOCK_DGRAM && + (error = nfs_connect(nmp, (struct nfsreq *)0))) + goto bad; + + /* + * This is silly, but it has to be set so that vinifod() works. + * We do not want to do an nfs_statfs() here since we can get + * stuck on a dead server and we are holding a lock on the mount + * point. + */ + mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA; + /* + * A reference count is needed on the nfsnode representing the + * remote root. If this object is not persistent, then backward + * traversals of the mount point (i.e. "..") will not work if + * the nfsnode gets flushed out of the cache. Ufs does not have + * this problem, because one can identify root inodes by their + * number == ROOTINO (2). + */ + if (error = nfs_nget(mp, &nmp->nm_fh, &np)) + goto bad; + *vpp = NFSTOV(np); + + return (0); +bad: + nfs_disconnect(nmp); + free((caddr_t)nmp, M_NFSMNT); + m_freem(nam); + return (error); +} + +/* + * unmount system call + */ +int +nfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct nfsmount *nmp; + struct nfsnode *np; + struct vnode *vp; + int error, flags = 0; + extern int doforce; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + nmp = VFSTONFS(mp); + /* + * Goes something like this.. + * - Check for activity on the root vnode (other than ourselves). + * - Call vflush() to clear out vnodes for this file system, + * except for the root vnode. + * - Decrement reference on the vnode representing remote root. + * - Close the socket + * - Free up the data structures + */ + /* + * We need to decrement the ref. count on the nfsnode representing + * the remote root. See comment in mountnfs(). The VFS unmount() + * has done vput on this vnode, otherwise we would get deadlock! + */ + if (error = nfs_nget(mp, &nmp->nm_fh, &np)) + return(error); + vp = NFSTOV(np); + if (vp->v_usecount > 2) { + vput(vp); + return (EBUSY); + } + + /* + * Must handshake with nqnfs_clientd() if it is active. + */ + nmp->nm_flag |= NFSMNT_DISMINPROG; + while (nmp->nm_inprog != NULLVP) + (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); + if (error = vflush(mp, vp, flags)) { + vput(vp); + nmp->nm_flag &= ~NFSMNT_DISMINPROG; + return (error); + } + + /* + * We are now committed to the unmount. + * For NQNFS, let the server daemon free the nfsmount structure. + */ + if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) + nmp->nm_flag |= NFSMNT_DISMNT; + + /* + * There are two reference counts to get rid of here. + */ + vrele(vp); + vrele(vp); + vgone(vp); + nfs_disconnect(nmp); + m_freem(nmp->nm_nam); + + if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) + free((caddr_t)nmp, M_NFSMNT); + return (0); +} + +/* + * Return root of a filesystem + */ +int +nfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + register struct vnode *vp; + struct nfsmount *nmp; + struct nfsnode *np; + int error; + + nmp = VFSTONFS(mp); + if (error = nfs_nget(mp, &nmp->nm_fh, &np)) + return (error); + vp = NFSTOV(np); + vp->v_type = VDIR; + vp->v_flag = VROOT; + *vpp = vp; + return (0); +} + +extern int syncprt; + +/* + * Flush out the buffer cache + */ +/* ARGSUSED */ +int +nfs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp; + int error, allerror = 0; + + /* + * Force stale buffer cache information to be flushed. + */ +loop: + for (vp = mp->mnt_vnodelist.lh_first; + vp != NULL; + vp = vp->v_mntvnodes.le_next) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL) + continue; + if (vget(vp, 1)) + goto loop; + if (error = VOP_FSYNC(vp, cred, waitfor, p)) + allerror = error; + vput(vp); + } + return (allerror); +} + +/* + * NFS flat namespace lookup. + * Currently unsupported. + */ +/* ARGSUSED */ +int +nfs_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + + return (EOPNOTSUPP); +} + +/* + * At this point, this should never happen + */ +/* ARGSUSED */ +int +nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + register struct mount *mp; + struct fid *fhp; + struct mbuf *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + + return (EINVAL); +} + +/* + * Vnode pointer to File handle, should never happen either + */ +/* ARGSUSED */ +int +nfs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + + return (EINVAL); +} + +/* + * Vfs start routine, a no-op. + */ +/* ARGSUSED */ +int +nfs_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + + return (0); +} + +/* + * Do operations associated with quotas, not supported + */ +/* ARGSUSED */ +int +nfs_quotactl(mp, cmd, uid, arg, p) + struct mount *mp; + int cmd; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + + return (EOPNOTSUPP); +} diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c new file mode 100644 index 00000000000..49c09403b47 --- /dev/null +++ b/sys/nfs/nfs_vnops.c @@ -0,0 +1,2563 @@ +/* $NetBSD: nfs_vnops.c,v 1.51 1995/10/09 11:25:30 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfs_vnops.c 8.10 (Berkeley) 8/11/94 + */ + +/* + * vnode op calls for sun nfs version 2 + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/conf.h> +#include <sys/namei.h> +#include <sys/vnode.h> +#include <sys/map.h> +#include <sys/dirent.h> +#include <sys/lockf.h> + +#include <vm/vm.h> + +#include <miscfs/specfs/specdev.h> +#include <miscfs/fifofs/fifo.h> + +#include <nfs/rpcv2.h> +#include <nfs/nfsv2.h> +#include <nfs/nfs.h> +#include <nfs/nfsnode.h> +#include <nfs/nfsmount.h> +#include <nfs/xdr_subs.h> +#include <nfs/nfsm_subs.h> +#include <nfs/nqnfs.h> + +/* Defs */ +#define TRUE 1 +#define FALSE 0 + +/* + * Global vfs data structures for nfs + */ +int (**nfsv2_vnodeop_p)(); +struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, nfs_lookup }, /* lookup */ + { &vop_create_desc, nfs_create }, /* create */ + { &vop_mknod_desc, nfs_mknod }, /* mknod */ + { &vop_open_desc, nfs_open }, /* open */ + { &vop_close_desc, nfs_close }, /* close */ + { &vop_access_desc, nfs_access }, /* access */ + { &vop_getattr_desc, nfs_getattr }, /* getattr */ + { &vop_setattr_desc, nfs_setattr }, /* setattr */ + { &vop_read_desc, nfs_read }, /* read */ + { &vop_write_desc, nfs_write }, /* write */ + { &vop_lease_desc, nfs_lease_check }, /* lease */ + { &vop_ioctl_desc, nfs_ioctl }, /* ioctl */ + { &vop_select_desc, nfs_select }, /* select */ + { &vop_mmap_desc, nfs_mmap }, /* mmap */ + { &vop_fsync_desc, nfs_fsync }, /* fsync */ + { &vop_seek_desc, nfs_seek }, /* seek */ + { &vop_remove_desc, nfs_remove }, /* remove */ + { &vop_link_desc, nfs_link }, /* link */ + { &vop_rename_desc, nfs_rename }, /* rename */ + { &vop_mkdir_desc, nfs_mkdir }, /* mkdir */ + { &vop_rmdir_desc, nfs_rmdir }, /* rmdir */ + { &vop_symlink_desc, nfs_symlink }, /* symlink */ + { &vop_readdir_desc, nfs_readdir }, /* readdir */ + { &vop_readlink_desc, nfs_readlink }, /* readlink */ + { &vop_abortop_desc, nfs_abortop }, /* abortop */ + { &vop_inactive_desc, nfs_inactive }, /* inactive */ + { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */ + { &vop_lock_desc, nfs_lock }, /* lock */ + { &vop_unlock_desc, nfs_unlock }, /* unlock */ + { &vop_bmap_desc, nfs_bmap }, /* bmap */ + { &vop_strategy_desc, nfs_strategy }, /* strategy */ + { &vop_print_desc, nfs_print }, /* print */ + { &vop_islocked_desc, nfs_islocked }, /* islocked */ + { &vop_pathconf_desc, nfs_pathconf }, /* pathconf */ + { &vop_advlock_desc, nfs_advlock }, /* advlock */ + { &vop_blkatoff_desc, nfs_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, nfs_valloc }, /* valloc */ + { &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, nfs_vfree }, /* vfree */ + { &vop_truncate_desc, nfs_truncate }, /* truncate */ + { &vop_update_desc, nfs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc nfsv2_vnodeop_opv_desc = + { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; + +/* + * Special device vnode ops + */ +int (**spec_nfsv2nodeop_p)(); +struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, spec_lookup }, /* lookup */ + { &vop_create_desc, spec_create }, /* create */ + { &vop_mknod_desc, spec_mknod }, /* mknod */ + { &vop_open_desc, spec_open }, /* open */ + { &vop_close_desc, nfsspec_close }, /* close */ + { &vop_access_desc, nfsspec_access }, /* access */ + { &vop_getattr_desc, nfs_getattr }, /* getattr */ + { &vop_setattr_desc, nfs_setattr }, /* setattr */ + { &vop_read_desc, nfsspec_read }, /* read */ + { &vop_write_desc, nfsspec_write }, /* write */ + { &vop_lease_desc, spec_lease_check }, /* lease */ + { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ + { &vop_select_desc, spec_select }, /* select */ + { &vop_mmap_desc, spec_mmap }, /* mmap */ + { &vop_fsync_desc, nfs_fsync }, /* fsync */ + { &vop_seek_desc, spec_seek }, /* seek */ + { &vop_remove_desc, spec_remove }, /* remove */ + { &vop_link_desc, spec_link }, /* link */ + { &vop_rename_desc, spec_rename }, /* rename */ + { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ + { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ + { &vop_symlink_desc, spec_symlink }, /* symlink */ + { &vop_readdir_desc, spec_readdir }, /* readdir */ + { &vop_readlink_desc, spec_readlink }, /* readlink */ + { &vop_abortop_desc, spec_abortop }, /* abortop */ + { &vop_inactive_desc, nfs_inactive }, /* inactive */ + { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */ + { &vop_lock_desc, nfs_lock }, /* lock */ + { &vop_unlock_desc, nfs_unlock }, /* unlock */ + { &vop_bmap_desc, spec_bmap }, /* bmap */ + { &vop_strategy_desc, spec_strategy }, /* strategy */ + { &vop_print_desc, nfs_print }, /* print */ + { &vop_islocked_desc, nfs_islocked }, /* islocked */ + { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ + { &vop_advlock_desc, spec_advlock }, /* advlock */ + { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, spec_valloc }, /* valloc */ + { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, spec_vfree }, /* vfree */ + { &vop_truncate_desc, spec_truncate }, /* truncate */ + { &vop_update_desc, nfs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = + { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries }; + +#ifdef FIFO +int (**fifo_nfsv2nodeop_p)(); +struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, fifo_lookup }, /* lookup */ + { &vop_create_desc, fifo_create }, /* create */ + { &vop_mknod_desc, fifo_mknod }, /* mknod */ + { &vop_open_desc, fifo_open }, /* open */ + { &vop_close_desc, nfsfifo_close }, /* close */ + { &vop_access_desc, nfsspec_access }, /* access */ + { &vop_getattr_desc, nfs_getattr }, /* getattr */ + { &vop_setattr_desc, nfs_setattr }, /* setattr */ + { &vop_read_desc, nfsfifo_read }, /* read */ + { &vop_write_desc, nfsfifo_write }, /* write */ + { &vop_lease_desc, fifo_lease_check }, /* lease */ + { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ + { &vop_select_desc, fifo_select }, /* select */ + { &vop_mmap_desc, fifo_mmap }, /* mmap */ + { &vop_fsync_desc, nfs_fsync }, /* fsync */ + { &vop_seek_desc, fifo_seek }, /* seek */ + { &vop_remove_desc, fifo_remove }, /* remove */ + { &vop_link_desc, fifo_link }, /* link */ + { &vop_rename_desc, fifo_rename }, /* rename */ + { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ + { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ + { &vop_symlink_desc, fifo_symlink }, /* symlink */ + { &vop_readdir_desc, fifo_readdir }, /* readdir */ + { &vop_readlink_desc, fifo_readlink }, /* readlink */ + { &vop_abortop_desc, fifo_abortop }, /* abortop */ + { &vop_inactive_desc, nfs_inactive }, /* inactive */ + { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */ + { &vop_lock_desc, nfs_lock }, /* lock */ + { &vop_unlock_desc, nfs_unlock }, /* unlock */ + { &vop_bmap_desc, fifo_bmap }, /* bmap */ + { &vop_strategy_desc, fifo_badop }, /* strategy */ + { &vop_print_desc, nfs_print }, /* print */ + { &vop_islocked_desc, nfs_islocked }, /* islocked */ + { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ + { &vop_advlock_desc, fifo_advlock }, /* advlock */ + { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, fifo_valloc }, /* valloc */ + { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, fifo_vfree }, /* vfree */ + { &vop_truncate_desc, fifo_truncate }, /* truncate */ + { &vop_update_desc, nfs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = + { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries }; +#endif /* FIFO */ + +void nqnfs_clientlease(); + +/* + * Global variables + */ +extern u_long nfs_procids[NFS_NPROCS]; +extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false; +struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +int nfs_numasync = 0; +#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) + +/* + * nfs null call from vfs. + */ +int +nfs_null(vp, cred, procp) + struct vnode *vp; + struct ucred *cred; + struct proc *procp; +{ + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb; + + nfsm_reqhead(vp, NFSPROC_NULL, 0); + nfsm_request(vp, NFSPROC_NULL, procp, cred); + nfsm_reqdone; + return (error); +} + +/* + * nfs access vnode op. + * For nfs, just return ok. File accesses may fail later. + * For nqnfs, use the access rpc to check accessibility. If file modes are + * changed on the server, accesses might still fail later. + */ +int +nfs_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register u_long *tl; + register caddr_t cp; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + /* + * For nqnfs, do an access rpc, otherwise you are stuck emulating + * ufs_access() locally using the vattr. This may not be correct, + * since the server may apply other access criteria such as + * client uid-->server uid mapping that we do not know about, but + * this is better than just returning anything that is lying about + * in the cache. + */ + if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { + nfsstats.rpccnt[NQNFSPROC_ACCESS]++; + nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); + if (ap->a_mode & VREAD) + *tl++ = nfs_true; + else + *tl++ = nfs_false; + if (ap->a_mode & VWRITE) + *tl++ = nfs_true; + else + *tl++ = nfs_false; + if (ap->a_mode & VEXEC) + *tl = nfs_true; + else + *tl = nfs_false; + nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred); + nfsm_reqdone; + return (error); + } else + return (nfsspec_access(ap)); +} + +/* + * nfs open vnode op + * Check to see if the type is ok + * and that deletion is not in progress. + * For paged in text files, you will need to flush the page cache + * if consistency is lost. + */ +/* ARGSUSED */ +int +nfs_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct vattr vattr; + int error; + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) + return (EACCES); + if (vp->v_flag & VTEXT) { + /* + * Get a valid lease. If cached data is stale, flush it. + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKINVALID(vp, np, NQL_READ)) { + do { + error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p); + } while (error == NQNFS_EXPIRED); + if (error) + return (error); + if (np->n_lrev != np->n_brev || + (np->n_flag & NQNFSNONCACHE)) { + if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_p, 1)) == EINTR) + return (error); + (void) vnode_pager_uncache(vp); + np->n_brev = np->n_lrev; + } + } + } else { + if (np->n_flag & NMODIFIED) { + if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, + ap->a_p, 1)) == EINTR) + return (error); + (void) vnode_pager_uncache(vp); + np->n_attrstamp = 0; + np->n_direofoffset = 0; + if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p)) + return (error); + np->n_mtime = vattr.va_mtime.ts_sec; + } else { + if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p)) + return (error); + if (np->n_mtime != vattr.va_mtime.ts_sec) { + np->n_direofoffset = 0; + if ((error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_p, 1)) == EINTR) + return (error); + (void) vnode_pager_uncache(vp); + np->n_mtime = vattr.va_mtime.ts_sec; + } + } + } + } else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) + np->n_attrstamp = 0; /* For Open/Close consistency */ + return (0); +} + +/* + * nfs close vnode op + * For reg files, invalidate any buffer cache entries. + */ +/* ARGSUSED */ +int +nfs_close(ap) + struct vop_close_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + int error = 0; + + if (vp->v_type == VREG) { + if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && + (np->n_flag & NMODIFIED)) { + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); + np->n_attrstamp = 0; + } + if (np->n_flag & NWRITEERR) { + np->n_flag &= ~NWRITEERR; + error = np->n_error; + } + } + return (error); +} + +/* + * nfs getattr call from vfs. + */ +int +nfs_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register caddr_t cp; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + /* + * Update local times for special files. + */ + if (np->n_flag & (NACC | NUPD)) + np->n_flag |= NCHG; + /* + * First look in the cache. + */ + if (nfs_getattrcache(vp, ap->a_vap) == 0) + return (0); + nfsstats.rpccnt[NFSPROC_GETATTR]++; + nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH); + nfsm_fhtom(vp); + nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); + nfsm_loadattr(vp, ap->a_vap); + nfsm_reqdone; + return (error); +} + +/* + * nfs setattr call. + */ +int +nfs_setattr(ap) + struct vop_setattr_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct nfsv2_sattr *sp; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + u_long *tl; + int error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct vattr *vap = ap->a_vap; + u_quad_t frev, tsize; + + if (vap->va_size != VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VCHR: + case VBLK: + if (vap->va_mtime.ts_sec == VNOVAL && + vap->va_atime.ts_sec == VNOVAL && + vap->va_mode == (u_short)VNOVAL && + vap->va_uid == VNOVAL && + vap->va_gid == VNOVAL) + return (0); + vap->va_size = VNOVAL; + break; + default: + if (np->n_flag & NMODIFIED) { + if (vap->va_size == 0) + error = nfs_vinvalbuf(vp, 0, + ap->a_cred, ap->a_p, 1); + else + error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_p, 1); + if (error) + return (error); + } + tsize = np->n_size; + np->n_size = np->n_vattr.va_size = vap->va_size; + vnode_pager_setsize(vp, (u_long)np->n_size); + } + } else if ((vap->va_mtime.ts_sec != VNOVAL || + vap->va_atime.ts_sec != VNOVAL) && (np->n_flag & NMODIFIED)) { + error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); + if (error == EINTR) + return (error); + } + nfsstats.rpccnt[NFSPROC_SETATTR]++; + isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq)); + nfsm_fhtom(vp); + nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq)); + if (vap->va_mode == (u_short)-1) + sp->sa_mode = VNOVAL; + else + sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode); + if (vap->va_uid == (uid_t)-1) + sp->sa_uid = VNOVAL; + else + sp->sa_uid = txdr_unsigned(vap->va_uid); + if (vap->va_gid == (gid_t)-1) + sp->sa_gid = VNOVAL; + else + sp->sa_gid = txdr_unsigned(vap->va_gid); + if (isnq) { + txdr_hyper(&vap->va_size, &sp->sa_nqsize); + txdr_nqtime(&vap->va_atime, &sp->sa_nqatime); + txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime); + sp->sa_nqflags = txdr_unsigned(vap->va_flags); + sp->sa_nqrdev = VNOVAL; + } else { + sp->sa_nfssize = txdr_unsigned(vap->va_size); + txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime); + txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime); + } + nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred); + nfsm_loadattr(vp, (struct vattr *)0); + if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKCACHABLE(vp, NQL_WRITE)) { + nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); + fxdr_hyper(tl, &frev); + if (frev > np->n_brev) + np->n_brev = frev; + } + nfsm_reqdone; + if (error) { + np->n_size = np->n_vattr.va_size = tsize; + vnode_pager_setsize(vp, (u_long)np->n_size); + } + return (error); +} + +/* + * nfs lookup call, one step at a time... + * First look in cache + * If not found, unlock the directory nfsnode and do the rpc + */ +int +nfs_lookup(ap) + struct vop_lookup_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct componentname *cnp = ap->a_cnp; + register struct vnode *dvp = ap->a_dvp; + register struct vnode **vpp = ap->a_vpp; + register int flags = cnp->cn_flags; + register struct vnode *vdp; + register u_long *tl; + register caddr_t cp; + register long t1, t2; + struct nfsmount *nmp; + caddr_t bpos, dpos, cp2; + time_t reqtime; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vnode *newvp; + long len; + nfsv2fh_t *fhp; + struct nfsnode *np; + int lockparent, wantparent, error = 0; + int nqlflag, cachable; + u_quad_t frev; + + *vpp = NULL; + if (dvp->v_type != VDIR) + return (ENOTDIR); + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + nmp = VFSTONFS(dvp->v_mount); + np = VTONFS(dvp); + if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { + struct vattr vattr; + int vpid; + + vdp = *vpp; + vpid = vdp->v_id; + /* + * See the comment starting `Step through' in ufs/ufs_lookup.c + * for an explanation of the locking protocol + */ + if (dvp == vdp) { + VREF(vdp); + error = 0; + } else + error = vget(vdp, 1); + if (!error) { + if (vpid == vdp->v_id) { + if (nmp->nm_flag & NFSMNT_NQNFS) { + if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) { + cachehit: + nfsstats.lookupcache_hits++; + if (cnp->cn_nameiop != LOOKUP && + (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + return (0); + } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) { + if (np->n_lrev != np->n_brev || + (np->n_flag & NMODIFIED)) { + np->n_direofoffset = 0; + cache_purge(dvp); + error = nfs_vinvalbuf(dvp, 0, + cnp->cn_cred, cnp->cn_proc, + 1); + if (error == EINTR) + return (error); + np->n_brev = np->n_lrev; + } else + goto cachehit; + } + } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) && + vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) + goto cachehit; + cache_purge(vdp); + } + vrele(vdp); + } + *vpp = NULLVP; + } + error = 0; + nfsstats.lookupcache_misses++; + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + len = cnp->cn_namelen; + nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)); + + /* + * For nqnfs optionally piggyback a getlease request for the name + * being looked up. + */ + if (nmp->nm_flag & NFSMNT_NQNFS) { + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) && + ((cnp->cn_flags & MAKEENTRY) && + (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)))) + *tl = txdr_unsigned(nmp->nm_leaseterm); + else + *tl = 0; + } + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + reqtime = time.tv_sec; + nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); +nfsmout: + if (error) { + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN) && error == ENOENT) + error = EJUSTRETURN; + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + return (error); + } + if (nmp->nm_flag & NFSMNT_NQNFS) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + if (*tl) { + nqlflag = fxdr_unsigned(int, *tl); + nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); + cachable = fxdr_unsigned(int, *tl++); + reqtime += fxdr_unsigned(int, *tl++); + fxdr_hyper(tl, &frev); + } else + nqlflag = 0; + } + nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH); + + /* + * Handle RENAME case... + */ + if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { + if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) { + m_freem(mrep); + return (EISDIR); + } + if (error = nfs_nget(dvp->v_mount, fhp, &np)) { + m_freem(mrep); + return (error); + } + newvp = NFSTOV(np); + if (error = + nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) { + vrele(newvp); + m_freem(mrep); + return (error); + } + *vpp = newvp; + m_freem(mrep); + cnp->cn_flags |= SAVENAME; + return (0); + } + + if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) { + VREF(dvp); + newvp = dvp; + } else { + if (error = nfs_nget(dvp->v_mount, fhp, &np)) { + m_freem(mrep); + return (error); + } + newvp = NFSTOV(np); + } + if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) { + vrele(newvp); + m_freem(mrep); + return (error); + } + m_freem(mrep); + *vpp = newvp; + if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + if ((cnp->cn_flags & MAKEENTRY) && + (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { + if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) + np->n_ctime = np->n_vattr.va_ctime.ts_sec; + else if (nqlflag && reqtime > time.tv_sec) + nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime, + frev); + cache_enter(dvp, *vpp, cnp); + } + return (0); +} + +/* + * nfs read call. + * Just call nfs_bioread() to do the work. + */ +int +nfs_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (vp->v_type != VREG) + return (EPERM); + return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); +} + +/* + * nfs readlink call + */ +int +nfs_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (vp->v_type != VLNK) + return (EPERM); + return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); +} + +/* + * Do a readlink rpc. + * Called by nfs_doio() from below the buffer cache. + */ +int +nfs_readlinkrpc(vp, uiop, cred) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; +{ + register u_long *tl; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + long len; + + nfsstats.rpccnt[NFSPROC_READLINK]++; + nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH); + nfsm_fhtom(vp); + nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); + nfsm_strsiz(len, NFS_MAXPATHLEN); + nfsm_mtouio(uiop, len); + nfsm_reqdone; + return (error); +} + +/* + * nfs read rpc call + * Ditto above + */ +int +nfs_readrpc(vp, uiop, cred) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; +{ + register u_long *tl; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nfsmount *nmp; + long len, retlen, tsiz; + + nmp = VFSTONFS(vp->v_mount); + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > 0xffffffff && + (nmp->nm_flag & NFSMNT_NQNFS) == 0) + return (EFBIG); + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_READ]++; + len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; + nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, NFSX_UNSIGNED*3); + if (nmp->nm_flag & NFSMNT_NQNFS) { + txdr_hyper(&uiop->uio_offset, tl); + *(tl + 2) = txdr_unsigned(len); + } else { + *tl++ = txdr_unsigned(uiop->uio_offset); + *tl++ = txdr_unsigned(len); + *tl = 0; + } + nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); + nfsm_loadattr(vp, (struct vattr *)0); + nfsm_strsiz(retlen, nmp->nm_rsize); + nfsm_mtouio(uiop, retlen); + m_freem(mrep); + if (retlen < len) + tsiz = 0; + else + tsiz -= len; + } +nfsmout: + return (error); +} + +/* + * nfs write call + */ +int +nfs_writerpc(vp, uiop, cred, ioflags) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; + int ioflags; +{ + register u_long *tl; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nfsmount *nmp; + struct nfsnode *np = VTONFS(vp); + u_quad_t frev; + long len, tsiz; + + nmp = VFSTONFS(vp->v_mount); + tsiz = uiop->uio_resid; + if (uiop->uio_offset + tsiz > 0xffffffff && + (nmp->nm_flag & NFSMNT_NQNFS) == 0) + return (EFBIG); + while (tsiz > 0) { + nfsstats.rpccnt[NFSPROC_WRITE]++; + len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; + nfsm_reqhead(vp, NFSPROC_WRITE, + NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len)); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4); + if (nmp->nm_flag & NFSMNT_NQNFS) { + txdr_hyper(&uiop->uio_offset, tl); + tl += 2; + *tl++ = 0; + *tl = txdr_unsigned(len); + } else { + register u_int32_t x; + /* Set both "begin" and "current" to non-garbage. */ + x = txdr_unsigned((u_int32_t)uiop->uio_offset); + *tl++ = x; /* "begin offset" */ + *tl++ = x; /* "current offset" */ + x = txdr_unsigned(len); + *tl++ = x; /* total to this offset */ + *tl = x; /* size of this write */ + } + nfsm_uiotom(uiop, len); + nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); + nfsm_loadattr(vp, (struct vattr *)0); + if (nmp->nm_flag & NFSMNT_MYWRITE) + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec; + else if ((nmp->nm_flag & NFSMNT_NQNFS) && + NQNFS_CKCACHABLE(vp, NQL_WRITE)) { + nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); + fxdr_hyper(tl, &frev); + if (frev > np->n_brev) + np->n_brev = frev; + } + m_freem(mrep); + tsiz -= len; + } +nfsmout: + if (error) + uiop->uio_resid = tsiz; + return (error); +} + +/* + * nfs mknod call + * This is a kludge. Use a create rpc but with the IFMT bits of the mode + * set to specify the file type and the size field for rdev. + */ +/* ARGSUSED */ +int +nfs_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_long *tl; + register caddr_t cp; + register long t1, t2; + struct vnode *newvp; + struct vattr vattr; + char *cp2; + caddr_t bpos, dpos; + int error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + u_long rdev; + + if (vap->va_type == VCHR || vap->va_type == VBLK) + rdev = txdr_unsigned(vap->va_rdev); +#ifdef FIFO + else if (vap->va_type == VFIFO) + rdev = 0xffffffff; +#endif /* FIFO */ + else { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return (EOPNOTSUPP); + } + if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return (error); + } + newvp = NULLVP; + nfsstats.rpccnt[NFSPROC_CREATE]++; + isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsm_reqhead(dvp, NFSPROC_CREATE, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq)); + sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode); + sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); + sp->sa_gid = txdr_unsigned(vattr.va_gid); + if (isnq) { + sp->sa_nqrdev = rdev; + sp->sa_nqflags = 0; + txdr_nqtime(&vap->va_atime, &sp->sa_nqatime); + txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime); + } else { + sp->sa_nfssize = rdev; + txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime); + txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime); + } + nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); + nfsm_mtofh(dvp, newvp); + nfsm_reqdone; + if (!error && (cnp->cn_flags & MAKEENTRY)) + cache_enter(dvp, newvp, cnp); + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + vrele(dvp); + if (newvp != NULLVP) + vrele(newvp); + return (error); +} + +/* + * nfs file create call + */ +int +nfs_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_long *tl; + register caddr_t cp; + register long t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vattr vattr; + + if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return (error); + } + nfsstats.rpccnt[NFSPROC_CREATE]++; + isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsm_reqhead(dvp, NFSPROC_CREATE, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq)); + sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode); + sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); + sp->sa_gid = txdr_unsigned(vattr.va_gid); + if (isnq) { + u_quad_t qval = 0; + + txdr_hyper(&qval, &sp->sa_nqsize); + sp->sa_nqrdev = -1; + sp->sa_nqflags = 0; + txdr_nqtime(&vap->va_atime, &sp->sa_nqatime); + txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime); + } else { + sp->sa_nfssize = 0; + txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime); + txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime); + } + nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); + nfsm_mtofh(dvp, *ap->a_vpp); + nfsm_reqdone; + if (!error && (cnp->cn_flags & MAKEENTRY)) + cache_enter(dvp, *ap->a_vpp, cnp); + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + vrele(dvp); + return (error); +} + +/* + * nfs file remove call + * To try and make nfs semantics closer to ufs semantics, a file that has + * other processes using the vnode is renamed instead of removed and then + * removed later on the last close. + * - If v_usecount > 1 + * If a rename is not already in the works + * call nfs_sillyrename() to set it up + * else + * do the remove rpc + */ +int +nfs_remove(ap) + struct vop_remove_args /* { + struct vnodeop_desc *a_desc; + struct vnode * a_dvp; + struct vnode * a_vp; + struct componentname * a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *dvp = ap->a_dvp; + register struct componentname *cnp = ap->a_cnp; + register struct nfsnode *np = VTONFS(vp); + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vattr vattr; + + if (vp->v_usecount > 1) { + if (!np->n_sillyrename) + error = nfs_sillyrename(dvp, vp, cnp); + else if (VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) + == 0 && vattr.va_nlink > 1) + /* + * If we already have a silly name but there are more + * than one links, just proceed with the NFS remove + * request, as the bits will remain available (modulo + * network races). This avoids silently ignoring the + * attempted removal of a non-silly entry. + */ + goto doit; + } else { + doit: + /* + * Purge the name cache so that the chance of a lookup for + * the name succeeding while the remove is in progress is + * minimized. Without node locking it can still happen, such + * that an I/O op returns ESTALE, but since you get this if + * another host removes the file.. + */ + cache_purge(vp); + /* + * Throw away biocache buffers. Mainly to avoid + * unnecessary delayed writes. + */ + error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); + if (error == EINTR) + return (error); + /* Do the rpc */ + nfsstats.rpccnt[NFSPROC_REMOVE]++; + nfsm_reqhead(dvp, NFSPROC_REMOVE, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred); + nfsm_reqdone; + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + /* + * Kludge City: If the first reply to the remove rpc is lost.. + * the reply to the retransmitted request will be ENOENT + * since the file was in fact removed + * Therefore, we cheat and return success. + */ + if (error == ENOENT) + error = 0; + } + np->n_attrstamp = 0; + vrele(dvp); + vrele(vp); + return (error); +} + +/* + * nfs file remove rpc called from nfs_inactive + */ +int +nfs_removeit(sp) + register struct sillyrename *sp; +{ + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + nfsstats.rpccnt[NFSPROC_REMOVE]++; + nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen)); + nfsm_fhtom(sp->s_dvp); + nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN); + nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred); + nfsm_reqdone; + VTONFS(sp->s_dvp)->n_flag |= NMODIFIED; + VTONFS(sp->s_dvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs file rename call + */ +int +nfs_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + register struct vnode *fvp = ap->a_fvp; + register struct vnode *tvp = ap->a_tvp; + register struct vnode *fdvp = ap->a_fdvp; + register struct vnode *tdvp = ap->a_tdvp; + register struct componentname *tcnp = ap->a_tcnp; + register struct componentname *fcnp = ap->a_fcnp; + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + /* Check for cross-device rename */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + + nfsstats.rpccnt[NFSPROC_RENAME]++; + nfsm_reqhead(fdvp, NFSPROC_RENAME, + (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+ + nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/ + nfsm_fhtom(fdvp); + nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_fhtom(tdvp); + nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred); + nfsm_reqdone; + VTONFS(fdvp)->n_flag |= NMODIFIED; + VTONFS(fdvp)->n_attrstamp = 0; + VTONFS(tdvp)->n_flag |= NMODIFIED; + VTONFS(tdvp)->n_attrstamp = 0; + if (fvp->v_type == VDIR) { + if (tvp != NULL && tvp->v_type == VDIR) + cache_purge(tdvp); + cache_purge(fdvp); + } +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + /* + * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. + */ + if (error == ENOENT) + error = 0; + return (error); +} + +/* + * nfs file rename rpc called from nfs_remove() above + */ +int +nfs_renameit(sdvp, scnp, sp) + struct vnode *sdvp; + struct componentname *scnp; + register struct sillyrename *sp; +{ + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + nfsstats.rpccnt[NFSPROC_RENAME]++; + nfsm_reqhead(sdvp, NFSPROC_RENAME, + (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+ + nfsm_rndup(sp->s_namlen)); + nfsm_fhtom(sdvp); + nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_fhtom(sdvp); + nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN); + nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred); + nfsm_reqdone; + FREE(scnp->cn_pnbuf, M_NAMEI); + VTONFS(sdvp)->n_flag |= NMODIFIED; + VTONFS(sdvp)->n_attrstamp = 0; + return (error); +} + +/* + * nfs hard link create call + */ +int +nfs_link(ap) + struct vop_link_args /* { + struct vnode *a_vp; + struct vnode *a_tdvp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *tdvp = ap->a_tdvp; + register struct componentname *cnp = ap->a_cnp; + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + if (vp->v_mount != tdvp->v_mount) { + /*VOP_ABORTOP(vp, cnp);*/ + if (tdvp == vp) + vrele(vp); + else + vput(vp); + return (EXDEV); + } + + /* + * Push all writes to the server, so that the attribute cache + * doesn't get "out of sync" with the server. + * XXX There should be a better way! + */ + VOP_FSYNC(tdvp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); + + nfsstats.rpccnt[NFSPROC_LINK]++; + nfsm_reqhead(tdvp, NFSPROC_LINK, + NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)); + nfsm_fhtom(tdvp); + nfsm_fhtom(vp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); + nfsm_reqdone; + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(tdvp)->n_attrstamp = 0; + VTONFS(vp)->n_flag |= NMODIFIED; + VTONFS(vp)->n_attrstamp = 0; + vrele(vp); + /* + * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. + */ + if (error == EEXIST) + error = 0; + return (error); +} + +/* + * nfs symbolic link create call + */ +/* start here */ +int +nfs_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct nfsv2_sattr *sp; + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int slen, error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + nfsstats.rpccnt[NFSPROC_SYMLINK]++; + slen = strlen(ap->a_target); + isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+ + nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); + nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq)); + sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode); + sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); + sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); + if (isnq) { + quad_t qval = -1; + + txdr_hyper(&qval, &sp->sa_nqsize); + sp->sa_nqflags = 0; + txdr_nqtime(&vap->va_atime, &sp->sa_nqatime); + txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime); + } else { + sp->sa_nfssize = -1; + txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime); + txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime); + } + nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); + nfsm_reqdone; + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + vrele(dvp); + /* + * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. + */ + if (error == EEXIST) + error = 0; + return (error); +} + +/* + * nfs make dir call + */ +int +nfs_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + register struct vnode *dvp = ap->a_dvp; + register struct vattr *vap = ap->a_vap; + register struct componentname *cnp = ap->a_cnp; + register struct vnode **vpp = ap->a_vpp; + register struct nfsv2_sattr *sp; + register u_long *tl; + register caddr_t cp; + register long t1, t2; + register int len; + caddr_t bpos, dpos, cp2; + int error = 0, firsttry = 1, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct vattr vattr; + + if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return (error); + } + len = cnp->cn_namelen; + isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsstats.rpccnt[NFSPROC_MKDIR]++; + nfsm_reqhead(dvp, NFSPROC_MKDIR, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq)); + sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode); + sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); + sp->sa_gid = txdr_unsigned(vattr.va_gid); + if (isnq) { + quad_t qval = -1; + + txdr_hyper(&qval, &sp->sa_nqsize); + sp->sa_nqflags = 0; + txdr_nqtime(&vap->va_atime, &sp->sa_nqatime); + txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime); + } else { + sp->sa_nfssize = -1; + txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime); + txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime); + } + nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); + nfsm_mtofh(dvp, *vpp); + nfsm_reqdone; + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + /* + * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry + * if we can succeed in looking up the directory. + * "firsttry" is necessary since the macros may "goto nfsmout" which + * is above the if on errors. (Ugh) + */ + if (error == EEXIST && firsttry) { + firsttry = 0; + error = 0; + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + *vpp = NULL; + nfsm_reqhead(dvp, NFSPROC_LOOKUP, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); + nfsm_mtofh(dvp, *vpp); + if ((*vpp)->v_type != VDIR) { + vput(*vpp); + error = EEXIST; + } + m_freem(mrep); + } + FREE(cnp->cn_pnbuf, M_NAMEI); + vrele(dvp); + return (error); +} + +/* + * nfs remove directory call + */ +int +nfs_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *dvp = ap->a_dvp; + register struct componentname *cnp = ap->a_cnp; + register u_long *tl; + register caddr_t cp; + register long t2; + caddr_t bpos, dpos; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + + if (dvp == vp) { + vrele(dvp); + vrele(dvp); + FREE(cnp->cn_pnbuf, M_NAMEI); + return (EINVAL); + } + nfsstats.rpccnt[NFSPROC_RMDIR]++; + nfsm_reqhead(dvp, NFSPROC_RMDIR, + NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)); + nfsm_fhtom(dvp); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); + nfsm_reqdone; + FREE(cnp->cn_pnbuf, M_NAMEI); + VTONFS(dvp)->n_flag |= NMODIFIED; + VTONFS(dvp)->n_attrstamp = 0; + cache_purge(dvp); + cache_purge(vp); + vrele(vp); + vrele(dvp); + /* + * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. + */ + if (error == ENOENT) + error = 0; + return (error); +} + +/* + * nfs readdir call + * Although cookie is defined as opaque, I translate it to/from net byte + * order so that it looks more sensible. This appears consistent with the + * Ultrix implementation of NFS. + */ +int +nfs_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + u_long *a_cookies; + int a_ncookies; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct uio *uio = ap->a_uio; + int tresid, error; + struct vattr vattr; + + /* + * XXX + * We don't support cookies here, yet. + */ + if (ap->a_ncookies) + return (EINVAL); + + if (vp->v_type != VDIR) + return (EPERM); + /* + * First, check for hit on the EOF offset cache + */ + if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset && + (np->n_flag & NMODIFIED) == 0) { + if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { + if (NQNFS_CKCACHABLE(vp, NQL_READ)) { + nfsstats.direofcache_hits++; + return (0); + } + } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && + np->n_mtime == vattr.va_mtime.ts_sec) { + nfsstats.direofcache_hits++; + return (0); + } + } + + /* + * Call nfs_bioread() to do the real work. + */ + tresid = uio->uio_resid; + error = nfs_bioread(vp, uio, 0, ap->a_cred); + + if (!error && uio->uio_resid == tresid) + nfsstats.direofcache_misses++; + return (error); +} + +/* + * Readdir rpc call. + * Called from below the buffer cache by nfs_doio(). + */ +int +nfs_readdirrpc(vp, uiop, cred) + register struct vnode *vp; + struct uio *uiop; + struct ucred *cred; +{ + register long len; + register struct dirent *dp; + register u_long *tl; + register caddr_t cp; + register long t1; + long tlen, lastlen; + caddr_t bpos, dpos, cp2; + int error = 0; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct mbuf *md2; + caddr_t dpos2; + int siz; + int more_dirs = 1; + u_long off, savoff; + struct dirent *savdp; + struct nfsmount *nmp; + struct nfsnode *np = VTONFS(vp); + long tresid, extra; + + nmp = VFSTONFS(vp->v_mount); + extra = uiop->uio_resid & (NFS_DIRBLKSIZ - 1); + uiop->uio_resid -= extra; + tresid = uiop->uio_resid; + /* + * Loop around doing readdir rpc's of size uio_resid or nm_rsize, + * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ. + * The stopping criteria is EOF or buffer full. + */ + while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) { + nfsstats.rpccnt[NFSPROC_READDIR]++; + nfsm_reqhead(vp, NFSPROC_READDIR, + NFSX_FH + 2 * NFSX_UNSIGNED); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); + off = (u_long)uiop->uio_offset; + *tl++ = txdr_unsigned(off); + *tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ? + nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1)); + nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); + siz = 0; + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + more_dirs = fxdr_unsigned(int, *tl); + + /* Save the position so that we can do nfsm_mtouio() later */ + dpos2 = dpos; + md2 = md; + + /* loop thru the dir entries, doctoring them to 4bsd form */ +#ifdef lint + dp = (struct dirent *)0; +#endif /* lint */ + while (more_dirs && siz < uiop->uio_resid) { + savoff = off; /* Hold onto offset and dp */ + savdp = dp; + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + dp = (struct dirent *)tl; + dp->d_fileno = fxdr_unsigned(u_long, *tl++); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > NFS_MAXNAMLEN) { + error = EBADRPC; + m_freem(mrep); + goto nfsmout; + } + dp->d_namlen = (u_char)len; + dp->d_type = DT_UNKNOWN; + nfsm_adv(len); /* Point past name */ + tlen = nfsm_rndup(len); + /* + * This should not be necessary, but some servers have + * broken XDR such that these bytes are not null filled. + */ + if (tlen != len) { + *dpos = '\0'; /* Null-terminate */ + nfsm_adv(tlen - len); + len = tlen; + } + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + off = fxdr_unsigned(u_long, *tl); + *tl++ = 0; /* Ensures null termination of name */ + more_dirs = fxdr_unsigned(int, *tl); + dp->d_reclen = len + 4 * NFSX_UNSIGNED; + siz += dp->d_reclen; + } + /* + * If at end of rpc data, get the eof boolean + */ + if (!more_dirs) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + more_dirs = (fxdr_unsigned(int, *tl) == 0); + + /* + * If at EOF, cache directory offset + */ + if (!more_dirs) + np->n_direofoffset = off; + } + /* + * If there is too much to fit in the data buffer, use savoff and + * savdp to trim off the last record. + * --> we are not at eof + */ + if (siz > uiop->uio_resid) { + off = savoff; + siz -= dp->d_reclen; + dp = savdp; + more_dirs = 0; /* Paranoia */ + } + if (siz > 0) { + lastlen = dp->d_reclen; + md = md2; + dpos = dpos2; + nfsm_mtouio(uiop, siz); + uiop->uio_offset = (off_t)off; + } else + more_dirs = 0; /* Ugh, never happens, but in case.. */ + m_freem(mrep); + } + /* + * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ + * by increasing d_reclen for the last record. + */ + if (uiop->uio_resid < tresid) { + len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1); + if (len > 0) { + dp = (struct dirent *) + (uiop->uio_iov->iov_base - lastlen); + dp->d_reclen += len; + uiop->uio_iov->iov_base += len; + uiop->uio_iov->iov_len -= len; + uiop->uio_resid -= len; + } + } +nfsmout: + uiop->uio_resid += extra; + return (error); +} + +/* + * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc(). + */ +int +nfs_readdirlookrpc(vp, uiop, cred) + struct vnode *vp; + register struct uio *uiop; + struct ucred *cred; +{ + register int len; + register struct dirent *dp; + register u_long *tl; + register caddr_t cp; + register long t1; + caddr_t bpos, dpos, cp2; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct nameidata nami, *ndp = &nami; + struct componentname *cnp = &ndp->ni_cnd; + u_long off, endoff, fileno; + time_t reqtime, ltime; + struct nfsmount *nmp; + struct nfsnode *np; + struct vnode *newvp; + nfsv2fh_t *fhp; + u_quad_t frev; + int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i; + int cachable; + + if (uiop->uio_iovcnt != 1) + panic("nfs rdirlook"); + nmp = VFSTONFS(vp->v_mount); + tresid = uiop->uio_resid; + ndp->ni_dvp = vp; + newvp = NULLVP; + /* + * Loop around doing readdir rpc's of size uio_resid or nm_rsize, + * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ. + * The stopping criteria is EOF or buffer full. + */ + while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) { + nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++; + nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK, + NFSX_FH + 3 * NFSX_UNSIGNED); + nfsm_fhtom(vp); + nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); + off = (u_long)uiop->uio_offset; + *tl++ = txdr_unsigned(off); + *tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ? + nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1)); + if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) + *tl = txdr_unsigned(nmp->nm_leaseterm); + else + *tl = 0; + reqtime = time.tv_sec; + nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred); + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + more_dirs = fxdr_unsigned(int, *tl); + + /* loop thru the dir entries, doctoring them to 4bsd form */ + bigenough = 1; + while (more_dirs && bigenough) { + doit = 1; + nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); + if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) { + cachable = fxdr_unsigned(int, *tl++); + ltime = reqtime + fxdr_unsigned(int, *tl++); + fxdr_hyper(tl, &frev); + } + nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH); + if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) { + VREF(vp); + newvp = vp; + np = VTONFS(vp); + } else { + if (error = nfs_nget(vp->v_mount, fhp, &np)) + doit = 0; + newvp = NFSTOV(np); + } + if (error = nfs_loadattrcache(&newvp, &md, &dpos, + (struct vattr *)0)) + doit = 0; + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + fileno = fxdr_unsigned(u_long, *tl++); + len = fxdr_unsigned(int, *tl); + if (len <= 0 || len > NFS_MAXNAMLEN) { + error = EBADRPC; + m_freem(mrep); + goto nfsmout; + } + tlen = (len + 4) & ~0x3; + if ((tlen + DIRHDSIZ) > uiop->uio_resid) + bigenough = 0; + if (bigenough && doit) { + dp = (struct dirent *)uiop->uio_iov->iov_base; + dp->d_fileno = fileno; + dp->d_namlen = len; + dp->d_reclen = tlen + DIRHDSIZ; + dp->d_type = + IFTODT(VTTOIF(np->n_vattr.va_type)); + uiop->uio_resid -= DIRHDSIZ; + uiop->uio_iov->iov_base += DIRHDSIZ; + uiop->uio_iov->iov_len -= DIRHDSIZ; + cnp->cn_nameptr = uiop->uio_iov->iov_base; + cnp->cn_namelen = len; + ndp->ni_vp = newvp; + nfsm_mtouio(uiop, len); + cp = uiop->uio_iov->iov_base; + tlen -= len; + for (i = 0; i < tlen; i++) + *cp++ = '\0'; + uiop->uio_iov->iov_base += tlen; + uiop->uio_iov->iov_len -= tlen; + uiop->uio_resid -= tlen; + cnp->cn_hash = 0; + for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++) + cnp->cn_hash += (unsigned char)*cp * i; + if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) && + ltime > time.tv_sec) + nqnfs_clientlease(nmp, np, NQL_READ, + cachable, ltime, frev); + if (cnp->cn_namelen <= NCHNAMLEN) + cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); + } else { + nfsm_adv(nfsm_rndup(len)); + } + if (newvp != NULLVP) { + vrele(newvp); + newvp = NULLVP; + } + nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); + if (bigenough) + endoff = off = fxdr_unsigned(u_long, *tl++); + else + endoff = fxdr_unsigned(u_long, *tl++); + more_dirs = fxdr_unsigned(int, *tl); + } + /* + * If at end of rpc data, get the eof boolean + */ + if (!more_dirs) { + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + more_dirs = (fxdr_unsigned(int, *tl) == 0); + + /* + * If at EOF, cache directory offset + */ + if (!more_dirs) + VTONFS(vp)->n_direofoffset = endoff; + } + if (uiop->uio_resid < tresid) + uiop->uio_offset = (off_t)off; + else + more_dirs = 0; + m_freem(mrep); + } + /* + * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ + * by increasing d_reclen for the last record. + */ + if (uiop->uio_resid < tresid) { + len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1); + if (len > 0) { + dp->d_reclen += len; + uiop->uio_iov->iov_base += len; + uiop->uio_iov->iov_len -= len; + uiop->uio_resid -= len; + } + } +nfsmout: + if (newvp != NULLVP) + vrele(newvp); + return (error); +} +static char hextoasc[] = "0123456789abcdef"; + +/* + * Silly rename. To make the NFS filesystem that is stateless look a little + * more like the "ufs" a remove of an active vnode is translated to a rename + * to a funny looking filename that is removed by nfs_inactive on the + * nfsnode. There is the potential for another process on a different client + * to create the same funny name between the nfs_lookitup() fails and the + * nfs_rename() completes, but... + */ +int +nfs_sillyrename(dvp, vp, cnp) + struct vnode *dvp, *vp; + struct componentname *cnp; +{ + register struct nfsnode *np; + register struct sillyrename *sp; + int error; + short pid; + + cache_purge(dvp); + np = VTONFS(vp); +#ifdef SILLYSEPARATE + MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), + M_NFSREQ, M_WAITOK); +#else + sp = &np->n_silly; +#endif + sp->s_cred = crdup(cnp->cn_cred); + sp->s_dvp = dvp; + VREF(dvp); + + /* Fudge together a funny name */ + pid = cnp->cn_proc->p_pid; + bcopy(".nfsAxxxx4.4", sp->s_name, 13); + sp->s_namlen = 12; + sp->s_name[8] = hextoasc[pid & 0xf]; + sp->s_name[7] = hextoasc[(pid >> 4) & 0xf]; + sp->s_name[6] = hextoasc[(pid >> 8) & 0xf]; + sp->s_name[5] = hextoasc[(pid >> 12) & 0xf]; + + /* Try lookitups until we get one that isn't there */ + while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) { + sp->s_name[4]++; + if (sp->s_name[4] > 'z') { + error = EINVAL; + goto bad; + } + } + if (error = nfs_renameit(dvp, cnp, sp)) + goto bad; + nfs_lookitup(sp, &np->n_fh, cnp->cn_proc); + np->n_sillyrename = sp; + return (0); +bad: + vrele(sp->s_dvp); + crfree(sp->s_cred); +#ifdef SILLYSEPARATE + free((caddr_t)sp, M_NFSREQ); +#endif + return (error); +} + +/* + * Look up a file name for silly rename stuff. + * Just like nfs_lookup() except that it doesn't load returned values + * into the nfsnode table. + * If fhp != NULL it copies the returned file handle out + */ +int +nfs_lookitup(sp, fhp, procp) + register struct sillyrename *sp; + nfsv2fh_t *fhp; + struct proc *procp; +{ + register struct vnode *vp = sp->s_dvp; + register u_long *tl; + register caddr_t cp; + register long t1, t2; + caddr_t bpos, dpos, cp2; + int error = 0, isnq; + struct mbuf *mreq, *mrep, *md, *mb, *mb2; + long len; + + isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS); + nfsstats.rpccnt[NFSPROC_LOOKUP]++; + len = sp->s_namlen; + nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)); + if (isnq) { + nfsm_build(tl, u_long *, NFSX_UNSIGNED); + *tl = 0; + } + nfsm_fhtom(vp); + nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN); + nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred); + if (fhp != NULL) { + if (isnq) + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); + nfsm_dissect(cp, caddr_t, NFSX_FH); + bcopy(cp, (caddr_t)fhp, NFSX_FH); + } + nfsm_reqdone; + return (error); +} + +/* + * Kludge City.. + * - make nfs_bmap() essentially a no-op that does no translation + * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc + * after mapping the physical addresses into Kernel Virtual space in the + * nfsiobuf area. + * (Maybe I could use the process's page mapping, but I was concerned that + * Kernel Write might not be enabled and also figured copyout() would do + * a lot more work than bcopy() and also it currently happens in the + * context of the swapper process (2). + */ +int +nfs_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + + if (ap->a_vpp != NULL) + *ap->a_vpp = vp; + if (ap->a_bnp != NULL) + *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); + return (0); +} + +/* + * Strategy routine. + * For async requests when nfsiod(s) are running, queue the request by + * calling nfs_asyncio(), otherwise just all nfs_doio() to do the + * request. + */ +int +nfs_strategy(ap) + struct vop_strategy_args *ap; +{ + register struct buf *bp = ap->a_bp; + struct ucred *cr; + struct proc *p; + int error = 0; + + if ((bp->b_flags & (B_PHYS|B_ASYNC)) == (B_PHYS|B_ASYNC)) + panic("nfs physio/async"); + if (bp->b_flags & B_ASYNC) + p = (struct proc *)0; + else + p = curproc; /* XXX */ + if (bp->b_flags & B_READ) + cr = bp->b_rcred; + else + cr = bp->b_wcred; + /* + * If the op is asynchronous and an i/o daemon is waiting + * queue the request, wake it up and wait for completion + * otherwise just do it ourselves. + */ + if ((bp->b_flags & B_ASYNC) == 0 || + nfs_asyncio(bp, NOCRED)) + error = nfs_doio(bp, cr, p); + return (error); +} + +/* + * Mmap a file + * + * NB Currently unsupported. + */ +/* ARGSUSED */ +int +nfs_mmap(ap) + struct vop_mmap_args /* { + struct vnode *a_vp; + int a_fflags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + return (EINVAL); +} + +/* + * Flush all the blocks associated with a vnode. + * Walk through the buffer pool and push any dirty pages + * associated with the vnode. + */ +/* ARGSUSED */ +int +nfs_fsync(ap) + struct vop_fsync_args /* { + struct vnodeop_desc *a_desc; + struct vnode * a_vp; + struct ucred * a_cred; + int a_waitfor; + struct proc * a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + register struct buf *bp; + struct buf *nbp; + struct nfsmount *nmp; + int s, error = 0, slptimeo = 0, slpflag = 0; + + nmp = VFSTONFS(vp->v_mount); + if (nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if (bp->b_flags & B_BUSY) { + if (ap->a_waitfor != MNT_WAIT) + continue; + bp->b_flags |= B_WANTED; + error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), + "nfsfsync", slptimeo); + splx(s); + if (error) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p)) + return (EINTR); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + goto loop; + } + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfs_fsync: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + bp->b_flags |= B_ASYNC; + VOP_BWRITE(bp); + goto loop; + } + splx(s); + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + error = tsleep((caddr_t)&vp->v_numoutput, + slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); + if (error) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p)) + return (EINTR); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + } + if (vp->v_dirtyblkhd.lh_first) { +#ifdef DIAGNOSTIC + vprint("nfs_fsync: dirty", vp); +#endif + goto loop; + } + } + if (np->n_flag & NWRITEERR) { + error = np->n_error; + np->n_flag &= ~NWRITEERR; + } + return (error); +} + +/* + * Return POSIX pathconf information applicable to nfs. + * + * Currently the NFS protocol does not support getting such + * information from the remote server. + */ +/* ARGSUSED */ +nfs_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + register_t *a_retval; + } */ *ap; +{ + + return (EINVAL); +} + +/* + * NFS advisory byte-level locks. + */ +int +nfs_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + return (lf_advlock(&np->n_lockf, np->n_size, ap->a_id, ap->a_op, + ap->a_fl, ap->a_flags)); +} + +/* + * Print out the contents of an nfsnode. + */ +int +nfs_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + + printf("tag VT_NFS, fileid %d fsid 0x%x", + np->n_vattr.va_fileid, np->n_vattr.va_fsid); +#ifdef FIFO + if (vp->v_type == VFIFO) + fifo_printinfo(vp); +#endif /* FIFO */ + printf("\n"); +} + +/* + * NFS directory offset lookup. + * Currently unsupported. + */ +int +nfs_blkatoff(ap) + struct vop_blkatoff_args /* { + struct vnode *a_vp; + off_t a_offset; + char **a_res; + struct buf **a_bpp; + } */ *ap; +{ + + return (EOPNOTSUPP); +} + +/* + * NFS flat namespace allocation. + * Currently unsupported. + */ +int +nfs_valloc(ap) + struct vop_valloc_args /* { + struct vnode *a_pvp; + int a_mode; + struct ucred *a_cred; + struct vnode **a_vpp; + } */ *ap; +{ + + return (EOPNOTSUPP); +} + +/* + * NFS flat namespace free. + * Currently unsupported. + */ +int +nfs_vfree(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ + + return (EOPNOTSUPP); +} + +/* + * NFS file truncation. + */ +int +nfs_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + /* Use nfs_setattr */ + printf("nfs_truncate: need to implement!!"); + return (EOPNOTSUPP); +} + +/* + * NFS update. + */ +int +nfs_update(ap) + struct vop_update_args /* { + struct vnode *a_vp; + struct timeval *a_ta; + struct timeval *a_tm; + int a_waitfor; + } */ *ap; +{ + + /* Use nfs_setattr */ + printf("nfs_update: need to implement!!"); + return (EOPNOTSUPP); +} + +/* + * nfs special file access vnode op. + * Essentially just get vattr and then imitate iaccess() since the device is + * local to the client. + */ +int +nfsspec_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vattr va; + int error; + + if (error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) + return (error); + + return (vaccess(va.va_mode, va.va_uid, va.va_gid, ap->a_mode, + ap->a_cred)); +} + +/* + * Read wrapper for special devices. + */ +int +nfsspec_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + np->n_atim = time; + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for special devices. + */ +int +nfsspec_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + np->n_mtim = time; + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for special devices. + * + * Update the times on the nfsnode then do device close. + */ +int +nfsspec_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + + if (np->n_flag & (NACC | NUPD)) { + np->n_flag |= NCHG; + if (vp->v_usecount == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) { + vattr.va_atime.ts_sec = np->n_atim.tv_sec; + vattr.va_atime.ts_nsec = + np->n_atim.tv_usec * 1000; + } + if (np->n_flag & NUPD) { + vattr.va_mtime.ts_sec = np->n_mtim.tv_sec; + vattr.va_mtime.ts_nsec = + np->n_mtim.tv_usec * 1000; + } + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + } + } + return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); +} + +#ifdef FIFO +/* + * Read wrapper for fifos. + */ +int +nfsfifo_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + extern int (**fifo_vnodeop_p)(); + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set access flag. + */ + np->n_flag |= NACC; + np->n_atim = time; + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); +} + +/* + * Write wrapper for fifos. + */ +int +nfsfifo_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + extern int (**fifo_vnodeop_p)(); + register struct nfsnode *np = VTONFS(ap->a_vp); + + /* + * Set update flag. + */ + np->n_flag |= NUPD; + np->n_mtim = time; + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); +} + +/* + * Close wrapper for fifos. + * + * Update the times on the nfsnode then do fifo close. + */ +int +nfsfifo_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct nfsnode *np = VTONFS(vp); + struct vattr vattr; + extern int (**fifo_vnodeop_p)(); + + if (np->n_flag & (NACC | NUPD)) { + if (np->n_flag & NACC) + np->n_atim = time; + if (np->n_flag & NUPD) + np->n_mtim = time; + np->n_flag |= NCHG; + if (vp->v_usecount == 1 && + (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + VATTR_NULL(&vattr); + if (np->n_flag & NACC) { + vattr.va_atime.ts_sec = np->n_atim.tv_sec; + vattr.va_atime.ts_nsec = + np->n_atim.tv_usec * 1000; + } + if (np->n_flag & NUPD) { + vattr.va_mtime.ts_sec = np->n_mtim.tv_sec; + vattr.va_mtime.ts_nsec = + np->n_mtim.tv_usec * 1000; + } + (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + } + } + return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); +} +#endif /* FIFO */ diff --git a/sys/nfs/nfsdiskless.h b/sys/nfs/nfsdiskless.h new file mode 100644 index 00000000000..c4dc461f775 --- /dev/null +++ b/sys/nfs/nfsdiskless.h @@ -0,0 +1,60 @@ +/* $NetBSD: nfsdiskless.h,v 1.7 1994/06/29 06:42:31 cgd Exp $ */ + +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsdiskless.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Structure that must be initialized for a diskless nfs client. + * This structure is used by nfs_mountroot() to set up the root and swap + * vnodes plus do a partial ifconfig(8) and route(8) so that the critical + * net interface can communicate with the server. + * Whether or not the swap area is nfs mounted is determined + * by the value in swdevt[0]. (equal to NODEV --> swap over nfs) + * Currently only works for AF_INET protocols. + * NB: All fields are stored in net byte order to avoid hassles with + * client/server byte ordering differences. + */ +struct nfs_dlmount { + struct sockaddr_in ndm_saddr; /* Address of file server */ + char ndm_host[MNAMELEN]; /* Host name for mount pt */ + u_char ndm_fh[NFS_FHSIZE]; /* The file's file handle */ +}; +struct nfs_diskless { + struct nfs_dlmount nd_root; /* Mount info for root */ + struct nfs_dlmount nd_swap; /* Mount info for swap */ +}; diff --git a/sys/nfs/nfsm_subs.h b/sys/nfs/nfsm_subs.h new file mode 100644 index 00000000000..44fac3fd5c7 --- /dev/null +++ b/sys/nfs/nfsm_subs.h @@ -0,0 +1,270 @@ +/* $NetBSD: nfsm_subs.h,v 1.6 1995/05/23 06:25:30 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93 + */ + +/* + * These macros do strange and peculiar things to mbuf chains for + * the assistance of the nfs code. To attempt to use them for any + * other purpose will be dangerous. (they make weird assumptions) + */ + +/* + * First define what the actual subs. return + */ +extern struct mbuf *nfsm_reqh(); + +#define M_HASCL(m) ((m)->m_flags & M_EXT) +#define NFSMINOFF(m) \ + if (M_HASCL(m)) \ + (m)->m_data = (m)->m_ext.ext_buf; \ + else if ((m)->m_flags & M_PKTHDR) \ + (m)->m_data = (m)->m_pktdat; \ + else \ + (m)->m_data = (m)->m_dat +#define NFSMADV(m, s) (m)->m_data += (s) +#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ + (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) + +/* + * Now for the macros that do the simple stuff and call the functions + * for the hard stuff. + * These macros use several vars. declared in nfsm_reqhead and these + * vars. must not be used elsewhere unless you are careful not to corrupt + * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries + * that may be used so long as the value is not expected to retained + * after a macro. + * I know, this is kind of dorkey, but it makes the actual op functions + * fairly clean and deals with the mess caused by the xdr discriminating + * unions. + */ + +#define nfsm_build(a,c,s) \ + { if ((s) > M_TRAILINGSPACE(mb)) { \ + MGET(mb2, M_WAIT, MT_DATA); \ + if ((s) > MLEN) \ + panic("build > MLEN"); \ + mb->m_next = mb2; \ + mb = mb2; \ + mb->m_len = 0; \ + bpos = mtod(mb, caddr_t); \ + } \ + (a) = (c)(bpos); \ + mb->m_len += (s); \ + bpos += (s); } + +#define nfsm_dissect(a,c,s) \ + { t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + (a) = (c)(dpos); \ + dpos += (s); \ + } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \ + m_freem(mrep); \ + goto nfsmout; \ + } else { \ + (a) = (c)cp2; \ + } } + +#define nfsm_fhtom(v) \ + nfsm_build(cp,caddr_t,NFSX_FH); \ + bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH) + +#define nfsm_srvfhtom(f) \ + nfsm_build(cp,caddr_t,NFSX_FH); \ + bcopy((caddr_t)(f), cp, NFSX_FH) + +#define nfsm_mtofh(d,v) \ + { struct nfsnode *np; nfsv2fh_t *fhp; \ + nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \ + if (error = nfs_nget((d)->v_mount, fhp, &np)) { \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = NFSTOV(np); \ + nfsm_loadattr(v, (struct vattr *)0); \ + } + +#define nfsm_loadattr(v,a) \ + { struct vnode *tvp = (v); \ + if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \ + m_freem(mrep); \ + goto nfsmout; \ + } \ + (v) = tvp; } + +#define nfsm_strsiz(s,m) \ + { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \ + m_freem(mrep); \ + error = EBADRPC; \ + goto nfsmout; \ + } } + +#define nfsm_srvstrsiz(s,m) \ + { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \ + if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \ + error = EBADRPC; \ + nfsm_reply(0); \ + } } + +#define nfsm_mtouio(p,s) \ + if ((s) > 0 && \ + (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \ + m_freem(mrep); \ + goto nfsmout; \ + } + +#define nfsm_uiotom(p,s) \ + if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \ + m_freem(mreq); \ + goto nfsmout; \ + } + +#define nfsm_reqhead(v,a,s) \ + mb = mreq = nfsm_reqh((v),(a),(s),&bpos) + +#define nfsm_reqdone m_freem(mrep); \ + nfsmout: + +#define nfsm_rndup(a) (((a)+3)&(~0x3)) + +#define nfsm_request(v, t, p, c) \ + if (error = nfs_request((v), mreq, (t), (p), \ + (c), &mrep, &md, &dpos)) \ + goto nfsmout + +#define nfsm_strtom(a,s,m) \ + if ((s) > (m)) { \ + m_freem(mreq); \ + error = ENAMETOOLONG; \ + goto nfsmout; \ + } \ + t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \ + if (t2 <= M_TRAILINGSPACE(mb)) { \ + nfsm_build(tl,u_long *,t2); \ + *tl++ = txdr_unsigned(s); \ + *(tl+((t2>>2)-2)) = 0; \ + bcopy((caddr_t)(a), (caddr_t)tl, (s)); \ + } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \ + m_freem(mreq); \ + goto nfsmout; \ + } + +#define nfsm_srvdone \ + nfsmout: \ + return(error) + +#define nfsm_reply(s) \ + { \ + nfsd->nd_repstat = error; \ + if (error) \ + (void) nfs_rephead(0, nfsd, error, cache, &frev, \ + mrq, &mb, &bpos); \ + else \ + (void) nfs_rephead((s), nfsd, error, cache, &frev, \ + mrq, &mb, &bpos); \ + m_freem(mrep); \ + mreq = *mrq; \ + if (error) \ + return(0); \ + } + +#define nfsm_adv(s) \ + t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + if (t1 >= (s)) { \ + dpos += (s); \ + } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \ + m_freem(mrep); \ + goto nfsmout; \ + } + +#define nfsm_srvmtofh(f) \ + nfsm_dissect(tl, u_long *, NFSX_FH); \ + bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH) + +#define nfsm_clget \ + if (bp >= be) { \ + if (mp == mb) \ + mp->m_len += bp-bpos; \ + MGET(mp, M_WAIT, MT_DATA); \ + MCLGET(mp, M_WAIT); \ + mp->m_len = NFSMSIZ(mp); \ + mp2->m_next = mp; \ + mp2 = mp; \ + bp = mtod(mp, caddr_t); \ + be = bp+mp->m_len; \ + } \ + tl = (u_long *)bp + +#define nfsm_srvfillattr \ + fp->fa_type = vtonfs_type(va.va_type); \ + fp->fa_mode = vtonfs_mode(va.va_type, va.va_mode); \ + fp->fa_nlink = txdr_unsigned(va.va_nlink); \ + fp->fa_uid = txdr_unsigned(va.va_uid); \ + fp->fa_gid = txdr_unsigned(va.va_gid); \ + if (nfsd->nd_nqlflag == NQL_NOVAL) { \ + fp->fa_nfsblocksize = txdr_unsigned(va.va_blocksize); \ + if (va.va_type == VFIFO) \ + fp->fa_nfsrdev = 0xffffffff; \ + else \ + fp->fa_nfsrdev = txdr_unsigned(va.va_rdev); \ + fp->fa_nfsfsid = txdr_unsigned(va.va_fsid); \ + fp->fa_nfsfileid = txdr_unsigned(va.va_fileid); \ + fp->fa_nfssize = txdr_unsigned(va.va_size); \ + fp->fa_nfsblocks = txdr_unsigned(va.va_bytes / NFS_FABLKSIZE); \ + txdr_nfstime(&va.va_atime, &fp->fa_nfsatime); \ + txdr_nfstime(&va.va_mtime, &fp->fa_nfsmtime); \ + txdr_nfstime(&va.va_ctime, &fp->fa_nfsctime); \ + } else { \ + fp->fa_nqblocksize = txdr_unsigned(va.va_blocksize); \ + if (va.va_type == VFIFO) \ + fp->fa_nqrdev = 0xffffffff; \ + else \ + fp->fa_nqrdev = txdr_unsigned(va.va_rdev); \ + fp->fa_nqfsid = txdr_unsigned(va.va_fsid); \ + fp->fa_nqfileid = txdr_unsigned(va.va_fileid); \ + txdr_hyper(&va.va_size, &fp->fa_nqsize); \ + txdr_hyper(&va.va_bytes, &fp->fa_nqbytes); \ + txdr_nqtime(&va.va_atime, &fp->fa_nqatime); \ + txdr_nqtime(&va.va_mtime, &fp->fa_nqmtime); \ + txdr_nqtime(&va.va_ctime, &fp->fa_nqctime); \ + fp->fa_nqflags = txdr_unsigned(va.va_flags); \ + fp->fa_nqgen = txdr_unsigned(va.va_gen); \ + txdr_hyper(&va.va_filerev, &fp->fa_nqfilerev); \ + } + diff --git a/sys/nfs/nfsmount.h b/sys/nfs/nfsmount.h new file mode 100644 index 00000000000..4ad22bcf0ff --- /dev/null +++ b/sys/nfs/nfsmount.h @@ -0,0 +1,128 @@ +/* $NetBSD: nfsmount.h,v 1.8 1995/03/26 20:37:31 jtc Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsmount.h 8.2 (Berkeley) 8/18/94 + */ + +/* + * Mount structure. + * One allocated on every NFS mount. + * Holds NFS specific information for mount. + */ +struct nfsmount { + int nm_flag; /* Flags for soft/hard... */ + struct mount *nm_mountp; /* Vfs structure for this filesystem */ + int nm_numgrps; /* Max. size of groupslist */ + nfsv2fh_t nm_fh; /* File handle of root dir */ + struct socket *nm_so; /* Rpc socket */ + int nm_sotype; /* Type of socket */ + int nm_soproto; /* and protocol */ + int nm_soflags; /* pr_flags for socket protocol */ + struct mbuf *nm_nam; /* Addr of server */ + int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ + int nm_retry; /* Max retries */ + int nm_srtt[4]; /* Timers for rpcs */ + int nm_sdrtt[4]; + int nm_sent; /* Request send count */ + int nm_cwnd; /* Request send window */ + int nm_timeouts; /* Request timeouts */ + int nm_deadthresh; /* Threshold of timeouts-->dead server*/ + int nm_rsize; /* Max size of read rpc */ + int nm_wsize; /* Max size of write rpc */ + int nm_readahead; /* Num. of blocks to readahead */ + int nm_leaseterm; /* Term (sec) for NQNFS lease */ + CIRCLEQ_HEAD(, nfsnode) nm_timerhead; /* Head of lease timer queue */ + struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */ + uid_t nm_authuid; /* Uid for authenticator */ + int nm_authtype; /* Authenticator type */ + int nm_authlen; /* and length */ + char *nm_authstr; /* Authenticator string */ +}; + +#ifdef _KERNEL +/* + * Convert mount ptr to nfsmount ptr. + */ +#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) +#endif /* _KERNEL */ + +/* + * Prototypes for NFS mount operations + */ +int nfs_mount __P(( + struct mount *mp, + char *path, + caddr_t data, + struct nameidata *ndp, + struct proc *p)); +int nfs_start __P(( + struct mount *mp, + int flags, + struct proc *p)); +int nfs_unmount __P(( + struct mount *mp, + int mntflags, + struct proc *p)); +int nfs_root __P(( + struct mount *mp, + struct vnode **vpp)); +int nfs_quotactl __P(( + struct mount *mp, + int cmds, + uid_t uid, + caddr_t arg, + struct proc *p)); +int nfs_statfs __P(( + struct mount *mp, + struct statfs *sbp, + struct proc *p)); +int nfs_sync __P(( + struct mount *mp, + int waitfor, + struct ucred *cred, + struct proc *p)); +int nfs_fhtovp __P(( + struct mount *mp, + struct fid *fhp, + struct mbuf *nam, + struct vnode **vpp, + int *exflagsp, + struct ucred **credanonp)); +int nfs_vptofh __P(( + struct vnode *vp, + struct fid *fhp)); +int nfs_init __P(()); diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h new file mode 100644 index 00000000000..8ee133e85a1 --- /dev/null +++ b/sys/nfs/nfsnode.h @@ -0,0 +1,167 @@ +/* $NetBSD: nfsnode.h,v 1.14 1995/03/26 20:37:32 jtc Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsnode.h 8.6 (Berkeley) 8/18/94 + */ + +/* + * Silly rename structure that hangs off the nfsnode until the name + * can be removed by nfs_inactive() + */ +struct sillyrename { + struct ucred *s_cred; + struct vnode *s_dvp; + long s_namlen; + char s_name[20]; +}; + +/* + * The nfsnode is the nfs equivalent to ufs's inode. Any similarity + * is purely coincidental. + * There is a unique nfsnode allocated for each active file, + * each current directory, each mounted-on file, text file, and the root. + * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) + */ + +struct nfsnode { + LIST_ENTRY(nfsnode) n_hash; /* Hash chain */ + CIRCLEQ_ENTRY(nfsnode) n_timer; /* Nqnfs timer chain */ + nfsv2fh_t n_fh; /* NFS File Handle */ + long n_flag; /* Flag for locking.. */ + struct vnode *n_vnode; /* vnode associated with this node */ + struct vattr n_vattr; /* Vnode attribute cache */ + time_t n_attrstamp; /* Time stamp for cached attributes */ + struct sillyrename *n_sillyrename; /* Ptr to silly rename struct */ + u_quad_t n_size; /* Current size of file */ + int n_error; /* Save write error value */ + u_long n_direofoffset; /* Dir. EOF offset cache */ + time_t n_mtime; /* Prev modify time. */ + time_t n_ctime; /* Prev create time. */ + u_quad_t n_brev; /* Modify rev when cached */ + u_quad_t n_lrev; /* Modify rev for lease */ + time_t n_expiry; /* Lease expiry time */ + struct lockf *n_lockf; /* Advisory lock records */ + struct sillyrename n_silly; /* Silly rename struct */ + struct timeval n_atim; /* Special file times */ + struct timeval n_mtim; +}; + +/* + * Flags for n_flag + */ +#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */ +#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */ +#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ +#define NWRITEERR 0x0008 /* Flag write errors so close will know */ +#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */ +#define NQNFSWRITE 0x0040 /* Write lease */ +#define NQNFSEVICTED 0x0080 /* Has been evicted */ +#define NACC 0x0100 /* Special file accessed */ +#define NUPD 0x0200 /* Special file updated */ +#define NCHG 0x0400 /* Special file times changed */ + +/* + * Convert between nfsnode pointers and vnode pointers + */ +#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) +#define NFSTOV(np) ((struct vnode *)(np)->n_vnode) + +/* + * Queue head for nfsiod's + */ +TAILQ_HEAD(, buf) nfs_bufq; + +#ifdef _KERNEL +/* + * Prototypes for NFS vnode operations + */ +int nfs_lookup __P((struct vop_lookup_args *)); +int nfs_create __P((struct vop_create_args *)); +int nfs_mknod __P((struct vop_mknod_args *)); +int nfs_open __P((struct vop_open_args *)); +int nfs_close __P((struct vop_close_args *)); +int nfsspec_close __P((struct vop_close_args *)); +#ifdef FIFO +int nfsfifo_close __P((struct vop_close_args *)); +#endif +int nfs_access __P((struct vop_access_args *)); +int nfsspec_access __P((struct vop_access_args *)); +int nfs_getattr __P((struct vop_getattr_args *)); +int nfs_setattr __P((struct vop_setattr_args *)); +int nfs_read __P((struct vop_read_args *)); +int nfs_write __P((struct vop_write_args *)); +#define nfs_lease_check ((int (*) __P((struct vop_lease_args *)))nullop) +int nfsspec_read __P((struct vop_read_args *)); +int nfsspec_write __P((struct vop_write_args *)); +#ifdef FIFO +int nfsfifo_read __P((struct vop_read_args *)); +int nfsfifo_write __P((struct vop_write_args *)); +#endif +#define nfs_ioctl ((int (*) __P((struct vop_ioctl_args *)))enoioctl) +#define nfs_select ((int (*) __P((struct vop_select_args *)))seltrue) +int nfs_mmap __P((struct vop_mmap_args *)); +int nfs_fsync __P((struct vop_fsync_args *)); +#define nfs_seek ((int (*) __P((struct vop_seek_args *)))nullop) +int nfs_remove __P((struct vop_remove_args *)); +int nfs_link __P((struct vop_link_args *)); +int nfs_rename __P((struct vop_rename_args *)); +int nfs_mkdir __P((struct vop_mkdir_args *)); +int nfs_rmdir __P((struct vop_rmdir_args *)); +int nfs_symlink __P((struct vop_symlink_args *)); +int nfs_readdir __P((struct vop_readdir_args *)); +int nfs_readlink __P((struct vop_readlink_args *)); +int nfs_abortop __P((struct vop_abortop_args *)); +int nfs_inactive __P((struct vop_inactive_args *)); +int nfs_reclaim __P((struct vop_reclaim_args *)); +int nfs_lock __P((struct vop_lock_args *)); +int nfs_unlock __P((struct vop_unlock_args *)); +int nfs_bmap __P((struct vop_bmap_args *)); +int nfs_strategy __P((struct vop_strategy_args *)); +int nfs_print __P((struct vop_print_args *)); +int nfs_islocked __P((struct vop_islocked_args *)); +int nfs_pathconf __P((struct vop_pathconf_args *)); +int nfs_advlock __P((struct vop_advlock_args *)); +int nfs_blkatoff __P((struct vop_blkatoff_args *)); +int nfs_vget __P((struct mount *, ino_t, struct vnode **)); +int nfs_valloc __P((struct vop_valloc_args *)); +#define nfs_reallocblks \ + ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp) +int nfs_vfree __P((struct vop_vfree_args *)); +int nfs_truncate __P((struct vop_truncate_args *)); +int nfs_update __P((struct vop_update_args *)); +int nfs_bwrite __P((struct vop_bwrite_args *)); +#endif /* _KERNEL */ diff --git a/sys/nfs/nfsrtt.h b/sys/nfs/nfsrtt.h new file mode 100644 index 00000000000..d455f85128a --- /dev/null +++ b/sys/nfs/nfsrtt.h @@ -0,0 +1,98 @@ +/* $NetBSD: nfsrtt.h,v 1.2 1994/06/29 06:42:37 cgd Exp $ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsrtt.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Definitions for performance monitor. + * The client and server logging are turned on by setting the global + * constant "nfsrtton" to 1. + */ +#define NFSRTTLOGSIZ 128 + +/* + * Circular log of client side rpc activity. Each log entry is for one + * rpc filled in upon completion. (ie. in order of completion) + * The "pos" is the table index for the "next" entry, therefore the + * list goes from nfsrtt.rttl[pos] --> nfsrtt.rttl[pos - 1] in + * chronological order of completion. + */ +struct nfsrtt { + int pos; /* Position in array for next entry */ + struct rttl { + int proc; /* NFS procedure number */ + int rtt; /* Measured round trip time */ + int rto; /* Round Trip Timeout */ + int sent; /* # rpcs in progress */ + int cwnd; /* Send window */ + int srtt; /* Ave Round Trip Time */ + int sdrtt; /* Ave mean deviation of RTT */ + fsid_t fsid; /* Fsid for mount point */ + struct timeval tstamp; /* Timestamp of log entry */ + } rttl[NFSRTTLOGSIZ]; +}; + +/* + * And definitions for server side performance monitor. + * The log organization is the same as above except it is filled in at the + * time the server sends the rpc reply. + */ + +/* + * Bits for the flags field. + */ +#define DRT_NQNFS 0x01 /* Rpc used Nqnfs protocol */ +#define DRT_TCP 0x02 /* Client used TCP transport */ +#define DRT_CACHEREPLY 0x04 /* Reply was from recent request cache */ +#define DRT_CACHEDROP 0x08 /* Rpc request dropped, due to recent reply */ + +/* + * Server log structure + * NB: ipadr == INADDR_ANY indicates a client using a non IP protocol. + * (ISO perhaps?) + */ +struct nfsdrt { + int pos; /* Position of next log entry */ + struct drt { + int flag; /* Bits as defined above */ + int proc; /* NFS procedure number */ + u_long ipadr; /* IP address of client */ + int resptime; /* Response time (usec) */ + struct timeval tstamp; /* Timestamp of log entry */ + } drt[NFSRTTLOGSIZ]; +}; diff --git a/sys/nfs/nfsrvcache.h b/sys/nfs/nfsrvcache.h new file mode 100644 index 00000000000..d6a1417b00c --- /dev/null +++ b/sys/nfs/nfsrvcache.h @@ -0,0 +1,84 @@ +/* $NetBSD: nfsrvcache.h,v 1.8 1994/12/13 17:17:07 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsrvcache.h 8.2 (Berkeley) 8/18/94 + */ + +/* + * Definitions for the server recent request cache + */ + +#define NFSRVCACHESIZ 256 + +struct nfsrvcache { + TAILQ_ENTRY(nfsrvcache) rc_lru; /* LRU chain */ + LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */ + u_long rc_xid; /* rpc id number */ + union { + struct mbuf *ru_repmb; /* Reply mbuf list OR */ + int ru_repstat; /* Reply status */ + } rc_un; + union nethostaddr rc_haddr; /* Host address */ + short rc_proc; /* rpc proc number */ + u_char rc_state; /* Current state of request */ + u_char rc_flag; /* Flag bits */ +}; + +#define rc_reply rc_un.ru_repmb +#define rc_status rc_un.ru_repstat +#define rc_inetaddr rc_haddr.had_inetaddr +#define rc_nam rc_haddr.had_nam + +/* Cache entry states */ +#define RC_UNUSED 0 +#define RC_INPROG 1 +#define RC_DONE 2 + +/* Return values */ +#define RC_DROPIT 0 +#define RC_REPLY 1 +#define RC_DOIT 2 +#define RC_CHECKIT 3 + +/* Flag bits */ +#define RC_LOCKED 0x01 +#define RC_WANTED 0x02 +#define RC_REPSTATUS 0x04 +#define RC_REPMBUF 0x08 +#define RC_NQNFS 0x10 +#define RC_INETADDR 0x20 +#define RC_NAM 0x40 diff --git a/sys/nfs/nfsv2.h b/sys/nfs/nfsv2.h new file mode 100644 index 00000000000..2b699cb72bf --- /dev/null +++ b/sys/nfs/nfsv2.h @@ -0,0 +1,262 @@ +/* $NetBSD: nfsv2.h,v 1.9 1994/06/29 06:42:40 cgd Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsv2.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * nfs definitions as per the version 2 specs + */ + +/* + * Constants as defined in the Sun NFS Version 2 spec. + * "NFS: Network File System Protocol Specification" RFC1094 + */ + +#define NFS_PORT 2049 +#define NFS_PROG 100003 +#define NFS_VER2 2 +#define NFS_MAXDGRAMDATA 8192 +#define NFS_MAXDATA 32768 +#define NFS_MAXPATHLEN 1024 +#define NFS_MAXNAMLEN 255 +#define NFS_FHSIZE 32 +#define NFS_MAXPKTHDR 404 +#define NFS_MAXPACKET (NFS_MAXPKTHDR+NFS_MAXDATA) +#define NFS_MINPACKET 20 +#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ + +/* Stat numbers for rpc returns */ +#define NFS_OK 0 +#define NFSERR_PERM 1 +#define NFSERR_NOENT 2 +#define NFSERR_IO 5 +#define NFSERR_NXIO 6 +#define NFSERR_ACCES 13 +#define NFSERR_EXIST 17 +#define NFSERR_NODEV 19 +#define NFSERR_NOTDIR 20 +#define NFSERR_ISDIR 21 +#define NFSERR_FBIG 27 +#define NFSERR_NOSPC 28 +#define NFSERR_ROFS 30 +#define NFSERR_NAMETOL 63 +#define NFSERR_NOTEMPTY 66 +#define NFSERR_DQUOT 69 +#define NFSERR_STALE 70 +#define NFSERR_WFLUSH 99 + +/* Sizes in bytes of various nfs rpc components */ +#define NFSX_FH 32 +#define NFSX_UNSIGNED 4 +#define NFSX_NFSFATTR 68 +#define NFSX_NQFATTR 92 +#define NFSX_NFSSATTR 32 +#define NFSX_NQSATTR 44 +#define NFSX_COOKIE 4 +#define NFSX_NFSSTATFS 20 +#define NFSX_NQSTATFS 28 +#define NFSX_FATTR(isnq) ((isnq) ? NFSX_NQFATTR : NFSX_NFSFATTR) +#define NFSX_SATTR(isnq) ((isnq) ? NFSX_NQSATTR : NFSX_NFSSATTR) +#define NFSX_STATFS(isnq) ((isnq) ? NFSX_NQSTATFS : NFSX_NFSSTATFS) + +/* nfs rpc procedure numbers */ +#define NFSPROC_NULL 0 +#define NFSPROC_GETATTR 1 +#define NFSPROC_SETATTR 2 +#define NFSPROC_NOOP 3 +#define NFSPROC_ROOT NFSPROC_NOOP /* Obsolete */ +#define NFSPROC_LOOKUP 4 +#define NFSPROC_READLINK 5 +#define NFSPROC_READ 6 +#define NFSPROC_WRITECACHE NFSPROC_NOOP /* Obsolete */ +#define NFSPROC_WRITE 8 +#define NFSPROC_CREATE 9 +#define NFSPROC_REMOVE 10 +#define NFSPROC_RENAME 11 +#define NFSPROC_LINK 12 +#define NFSPROC_SYMLINK 13 +#define NFSPROC_MKDIR 14 +#define NFSPROC_RMDIR 15 +#define NFSPROC_READDIR 16 +#define NFSPROC_STATFS 17 + +/* NQ nfs numbers */ +#define NQNFSPROC_READDIRLOOK 18 +#define NQNFSPROC_GETLEASE 19 +#define NQNFSPROC_VACATED 20 +#define NQNFSPROC_EVICTED 21 +#define NQNFSPROC_ACCESS 22 + +#define NFS_NPROCS 23 +/* Conversion macros */ +extern int vttoif_tab[]; +#define vtonfs_mode(t,m) \ + txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ + MAKEIMODE((t), (m))) +#define nfstov_mode(a) (fxdr_unsigned(u_short, (a))&07777) +#define vtonfs_type(a) txdr_unsigned(nfs_type[((long)(a))]) +#define nfstov_type(a) ntov_type[fxdr_unsigned(u_long,(a))&0x7] + +/* File types */ +typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5 } nfstype; + +/* Structs for common parts of the rpc's */ +struct nfsv2_time { + u_long nfs_sec; + u_long nfs_usec; +}; + +struct nqnfs_time { + u_long nq_sec; + u_long nq_nsec; +}; + +/* + * File attributes and setable attributes. These structures cover both + * NFS version 2 and the NQNFS protocol. Note that the union is only + * used to that one pointer can refer to both variants. These structures + * go out on the wire and must be densely packed, so no quad data types + * are used. (all fields are longs or u_longs or structures of same) + * NB: You can't do sizeof(struct nfsv2_fattr), you must use the + * NFSX_FATTR(isnq) macro. + */ +struct nfsv2_fattr { + u_long fa_type; + u_long fa_mode; + u_long fa_nlink; + u_long fa_uid; + u_long fa_gid; + union { + struct { + u_long nfsfa_size; + u_long nfsfa_blocksize; + u_long nfsfa_rdev; + u_long nfsfa_blocks; + u_long nfsfa_fsid; + u_long nfsfa_fileid; + struct nfsv2_time nfsfa_atime; + struct nfsv2_time nfsfa_mtime; + struct nfsv2_time nfsfa_ctime; + } fa_nfsv2; + struct { + struct { + u_long nqfa_qsize[2]; + } nqfa_size; + u_long nqfa_blocksize; + u_long nqfa_rdev; + struct { + u_long nqfa_qbytes[2]; + } nqfa_bytes; + u_long nqfa_fsid; + u_long nqfa_fileid; + struct nqnfs_time nqfa_atime; + struct nqnfs_time nqfa_mtime; + struct nqnfs_time nqfa_ctime; + u_long nqfa_flags; + u_long nqfa_gen; + struct { + u_long nqfa_qfilerev[2]; + } nqfa_filerev; + } fa_nqnfs; + } fa_un; +}; + +/* and some ugly defines for accessing union components */ +#define fa_nfssize fa_un.fa_nfsv2.nfsfa_size +#define fa_nfsblocksize fa_un.fa_nfsv2.nfsfa_blocksize +#define fa_nfsrdev fa_un.fa_nfsv2.nfsfa_rdev +#define fa_nfsblocks fa_un.fa_nfsv2.nfsfa_blocks +#define fa_nfsfsid fa_un.fa_nfsv2.nfsfa_fsid +#define fa_nfsfileid fa_un.fa_nfsv2.nfsfa_fileid +#define fa_nfsatime fa_un.fa_nfsv2.nfsfa_atime +#define fa_nfsmtime fa_un.fa_nfsv2.nfsfa_mtime +#define fa_nfsctime fa_un.fa_nfsv2.nfsfa_ctime +#define fa_nqsize fa_un.fa_nqnfs.nqfa_size +#define fa_nqblocksize fa_un.fa_nqnfs.nqfa_blocksize +#define fa_nqrdev fa_un.fa_nqnfs.nqfa_rdev +#define fa_nqbytes fa_un.fa_nqnfs.nqfa_bytes +#define fa_nqfsid fa_un.fa_nqnfs.nqfa_fsid +#define fa_nqfileid fa_un.fa_nqnfs.nqfa_fileid +#define fa_nqatime fa_un.fa_nqnfs.nqfa_atime +#define fa_nqmtime fa_un.fa_nqnfs.nqfa_mtime +#define fa_nqctime fa_un.fa_nqnfs.nqfa_ctime +#define fa_nqflags fa_un.fa_nqnfs.nqfa_flags +#define fa_nqgen fa_un.fa_nqnfs.nqfa_gen +#define fa_nqfilerev fa_un.fa_nqnfs.nqfa_filerev + +struct nfsv2_sattr { + u_long sa_mode; + u_long sa_uid; + u_long sa_gid; + union { + struct { + u_long nfssa_size; + struct nfsv2_time nfssa_atime; + struct nfsv2_time nfssa_mtime; + } sa_nfsv2; + struct { + struct { + u_long nqsa_qsize[2]; + } nqsa_size; + struct nqnfs_time nqsa_atime; + struct nqnfs_time nqsa_mtime; + u_long nqsa_flags; + u_long nqsa_rdev; + } sa_nqnfs; + } sa_un; +}; + +/* and some ugly defines for accessing the unions */ +#define sa_nfssize sa_un.sa_nfsv2.nfssa_size +#define sa_nfsatime sa_un.sa_nfsv2.nfssa_atime +#define sa_nfsmtime sa_un.sa_nfsv2.nfssa_mtime +#define sa_nqsize sa_un.sa_nqnfs.nqsa_size +#define sa_nqatime sa_un.sa_nqnfs.nqsa_atime +#define sa_nqmtime sa_un.sa_nqnfs.nqsa_mtime +#define sa_nqflags sa_un.sa_nqnfs.nqsa_flags +#define sa_nqrdev sa_un.sa_nqnfs.nqsa_rdev + +struct nfsv2_statfs { + u_long sf_tsize; + u_long sf_bsize; + u_long sf_blocks; + u_long sf_bfree; + u_long sf_bavail; + u_long sf_files; /* Nqnfs only */ + u_long sf_ffree; /* ditto */ +}; diff --git a/sys/nfs/nqnfs.h b/sys/nfs/nqnfs.h new file mode 100644 index 00000000000..f5f07347afc --- /dev/null +++ b/sys/nfs/nqnfs.h @@ -0,0 +1,202 @@ +/* $NetBSD: nqnfs.h,v 1.4 1994/12/13 17:17:08 mycroft Exp $ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nqnfs.h 8.2 (Berkeley) 8/18/94 + */ + +/* + * Definitions for NQNFS (Not Quite NFS) cache consistency protocol. + */ + +/* Tunable constants */ +#define NQ_CLOCKSKEW 3 /* Clock skew factor (sec) */ +#define NQ_WRITESLACK 5 /* Delay for write cache flushing */ +#define NQ_MAXLEASE 60 /* Max lease duration (sec) */ +#define NQ_MINLEASE 5 /* Min lease duration (sec) */ +#define NQ_DEFLEASE 30 /* Default lease duration (sec) */ +#define NQ_RENEWAL 3 /* Time before expiry (sec) to renew */ +#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ +#define NQ_MAXNUMLEASE 2048 /* Upper bound on number of server leases */ +#define NQ_DEADTHRESH NQ_NEVERDEAD /* Default nm_deadthresh */ +#define NQ_NEVERDEAD 9 /* Greater than max. nm_timeouts */ +#define NQLCHSZ 256 /* Server hash table size */ + +#define NQNFS_PROG 300105 /* As assigned by Sun */ +#define NQNFS_VER1 1 +#define NQNFS_EVICTSIZ 156 /* Size of eviction request in bytes */ + +/* + * Definitions used for saving the "last lease expires" time in Non-volatile + * RAM on the server. The default definitions below assume that NOVRAM is not + * available. + */ +#define NQSTORENOVRAM(t) +#define NQLOADNOVRAM(t) + +/* + * Defn and structs used on the server to maintain state for current leases. + * The list of host(s) that hold the lease are kept as nqhost structures. + * The first one lives in nqlease and any others are held in a linked + * list of nqm structures hanging off of nqlease. + * + * Each nqlease structure is chained into two lists. The first is a list + * ordered by increasing expiry time for nqsrv_timer() and the second is a chain + * hashed on lc_fh. + */ +#define LC_MOREHOSTSIZ 10 + +struct nqhost { + union { + struct { + u_short udp_flag; + u_short udp_port; + union nethostaddr udp_haddr; + } un_udp; + struct { + u_short connless_flag; + u_short connless_spare; + union nethostaddr connless_haddr; + } un_connless; + struct { + u_short conn_flag; + u_short conn_spare; + struct nfssvc_sock *conn_slp; + } un_conn; + } lph_un; +}; +#define lph_flag lph_un.un_udp.udp_flag +#define lph_port lph_un.un_udp.udp_port +#define lph_haddr lph_un.un_udp.udp_haddr +#define lph_inetaddr lph_un.un_udp.udp_haddr.had_inetaddr +#define lph_claddr lph_un.un_connless.connless_haddr +#define lph_nam lph_un.un_connless.connless_haddr.had_nam +#define lph_slp lph_un.un_conn.conn_slp + +struct nqlease { + LIST_ENTRY(nqlease) lc_hash; /* Fhandle hash list */ + CIRCLEQ_ENTRY(nqlease) lc_timer; /* Timer queue list */ + time_t lc_expiry; /* Expiry time (sec) */ + struct nqhost lc_host; /* Host that got lease */ + struct nqm *lc_morehosts; /* Other hosts that share read lease */ + fsid_t lc_fsid; /* Fhandle */ + char lc_fiddata[MAXFIDSZ]; + struct vnode *lc_vp; /* Soft reference to associated vnode */ +}; +#define lc_flag lc_host.lph_un.un_udp.udp_flag + +/* lc_flag bits */ +#define LC_VALID 0x0001 /* Host address valid */ +#define LC_WRITE 0x0002 /* Write cache */ +#define LC_NONCACHABLE 0x0004 /* Non-cachable lease */ +#define LC_LOCKED 0x0008 /* Locked */ +#define LC_WANTED 0x0010 /* Lock wanted */ +#define LC_EXPIREDWANTED 0x0020 /* Want lease when expired */ +#define LC_UDP 0x0040 /* Host address for udp socket */ +#define LC_CLTP 0x0080 /* Host address for other connectionless */ +#define LC_LOCAL 0x0100 /* Host is server */ +#define LC_VACATED 0x0200 /* Host has vacated lease */ +#define LC_WRITTEN 0x0400 /* Recently wrote to the leased file */ +#define LC_SREF 0x0800 /* Holds a nfssvc_sock reference */ + +struct nqm { + struct nqm *lpm_next; + struct nqhost lpm_hosts[LC_MOREHOSTSIZ]; +}; + +/* + * Flag bits for flags argument to nqsrv_getlease. + */ +#define NQL_READ LEASE_READ /* Read Request */ +#define NQL_WRITE LEASE_WRITE /* Write Request */ +#define NQL_CHECK 0x4 /* Check for lease */ +#define NQL_NOVAL 0xffffffff /* Invalid */ + +/* + * Special value for slp for local server calls. + */ +#define NQLOCALSLP ((struct nfssvc_sock *) -1) + +/* + * Server side macros. + */ +#define nqsrv_getl(v, l) \ + (void) nqsrv_getlease((v), &nfsd->nd_duration, \ + ((nfsd->nd_nqlflag != 0 && nfsd->nd_nqlflag != NQL_NOVAL) ? nfsd->nd_nqlflag : \ + ((l) | NQL_CHECK)), \ + nfsd, nam, &cache, &frev, cred) + +/* + * Client side macros that check for a valid lease. + */ +#define NQNFS_CKINVALID(v, n, f) \ + ((time.tv_sec > (n)->n_expiry && \ + VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \ + || ((f) == NQL_WRITE && ((n)->n_flag & NQNFSWRITE) == 0)) + +#define NQNFS_CKCACHABLE(v, f) \ + ((time.tv_sec <= VTONFS(v)->n_expiry || \ + VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \ + && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \ + ((f) == NQL_READ || (VTONFS(v)->n_flag & NQNFSWRITE))) + +#define NQNFS_NEEDLEASE(v, p) \ + (time.tv_sec > VTONFS(v)->n_expiry ? \ + ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \ + (((time.tv_sec + NQ_RENEWAL) > VTONFS(v)->n_expiry && \ + nqnfs_piggy[p]) ? \ + ((VTONFS(v)->n_flag & NQNFSWRITE) ? \ + NQL_WRITE : nqnfs_piggy[p]) : 0)) + +/* + * List head for timer queue. + */ +CIRCLEQ_HEAD(, nqlease) nqtimerhead; + +/* + * List head for the file handle hash table. + */ +#define NQFHHASH(f) \ + (&nqfhhashtbl[(*((u_long *)(f))) & nqfhhash]) +LIST_HEAD(nqfhhashhead, nqlease) *nqfhhashtbl; +u_long nqfhhash; + +/* + * Nqnfs return status numbers. + */ +#define NQNFS_EXPIRED 500 +#define NQNFS_TRYLATER 501 +#define NQNFS_AUTHERR 502 diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h new file mode 100644 index 00000000000..7b2827fd6d1 --- /dev/null +++ b/sys/nfs/rpcv2.h @@ -0,0 +1,90 @@ +/* $NetBSD: rpcv2.h,v 1.6 1994/06/29 06:42:43 cgd Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)rpcv2.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Definitions for Sun RPC Version 2, from + * "RPC: Remote Procedure Call Protocol Specification" RFC1057 + */ + +/* Version # */ +#define RPC_VER2 2 + +/* Authentication */ +#define RPCAUTH_NULL 0 +#define RPCAUTH_UNIX 1 +#define RPCAUTH_SHORT 2 +#define RPCAUTH_NQNFS 300000 +#define RPCAUTH_MAXSIZ 400 +#define RPCAUTH_UNIXGIDS 16 + +/* Rpc Constants */ +#define RPC_CALL 0 +#define RPC_REPLY 1 +#define RPC_MSGACCEPTED 0 +#define RPC_MSGDENIED 1 +#define RPC_PROGUNAVAIL 1 +#define RPC_PROGMISMATCH 2 +#define RPC_PROCUNAVAIL 3 +#define RPC_GARBAGE 4 /* I like this one */ +#define RPC_MISMATCH 0 +#define RPC_AUTHERR 1 + +/* Authentication failures */ +#define AUTH_BADCRED 1 +#define AUTH_REJECTCRED 2 +#define AUTH_BADVERF 3 +#define AUTH_REJECTVERF 4 +#define AUTH_TOOWEAK 5 /* Give em wheaties */ + +/* Sizes of rpc header parts */ +#define RPC_SIZ 24 +#define RPC_REPLYSIZ 28 + +/* RPC Prog definitions */ +#define RPCPROG_MNT 100005 +#define RPCMNT_VER1 1 +#define RPCMNT_MOUNT 1 +#define RPCMNT_DUMP 2 +#define RPCMNT_UMOUNT 3 +#define RPCMNT_UMNTALL 4 +#define RPCMNT_EXPORT 5 +#define RPCMNT_NAMELEN 255 +#define RPCMNT_PATHLEN 1024 +#define RPCPROG_NFS 100003 diff --git a/sys/nfs/swapnfs.c b/sys/nfs/swapnfs.c new file mode 100644 index 00000000000..aea8c28d232 --- /dev/null +++ b/sys/nfs/swapnfs.c @@ -0,0 +1,63 @@ +/* $NetBSD: swapnfs.c,v 1.9 1995/04/30 07:03:13 cgd Exp $ */ + +/* + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)nfsswapvmunix.c 7.1 (Berkeley) 3/4/91 + */ + +/* + * NFS parameters are now filled in nfs_mountroot() by + * nfs_boot(). + */ + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/socket.h> +#include <sys/mount.h> + +#include <net/if.h> + +dev_t rootdev = NODEV; +dev_t argdev = NODEV; +dev_t dumpdev = NODEV; + +struct swdevt swdevt[] = { + { NODEV, 0, 0 }, + { NODEV, 0, 0 } +}; + +extern int nfs_mountroot(); +int (*mountroot)() = nfs_mountroot; diff --git a/sys/nfs/xdr_subs.h b/sys/nfs/xdr_subs.h new file mode 100644 index 00000000000..a0dfcc4146e --- /dev/null +++ b/sys/nfs/xdr_subs.h @@ -0,0 +1,83 @@ +/* $NetBSD: xdr_subs.h,v 1.8 1995/01/13 16:15:02 mycroft Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Rick Macklem at The University of Guelph. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)xdr_subs.h 8.1 (Berkeley) 6/10/93 + */ + +/* + * Macros used for conversion to/from xdr representation by nfs... + * These use the MACHINE DEPENDENT routines ntohl, htonl + * As defined by "XDR: External Data Representation Standard" RFC1014 + * + * To simplify the implementation, we use ntohl/htonl even on big-endian + * machines, and count on them being `#define'd away. Some of these + * might be slightly more efficient as quad_t copies on a big-endian, + * but we cannot count on their alignment anyway. + */ + +#define fxdr_unsigned(t, v) ((t)ntohl((long)(v))) +#define txdr_unsigned(v) (htonl((long)(v))) + +#define fxdr_nfstime(f, t) { \ + (t)->ts_sec = ntohl(((struct nfsv2_time *)(f))->nfs_sec); \ + if (((struct nfsv2_time *)(f))->nfs_usec != 0xffffffff) \ + (t)->ts_nsec = 1000 * ntohl(((struct nfsv2_time *)(f))->nfs_usec); \ + else \ + (t)->ts_nsec = 0; \ +} +#define txdr_nfstime(f, t) { \ + ((struct nfsv2_time *)(t))->nfs_sec = htonl((f)->ts_sec); \ + ((struct nfsv2_time *)(t))->nfs_usec = htonl((f)->ts_nsec) / 1000; \ +} + +#define fxdr_nqtime(f, t) { \ + (t)->ts_sec = ntohl(((struct nqnfs_time *)(f))->nq_sec); \ + (t)->ts_nsec = ntohl(((struct nqnfs_time *)(f))->nq_nsec); \ +} +#define txdr_nqtime(f, t) { \ + ((struct nqnfs_time *)(t))->nq_sec = htonl((f)->ts_sec); \ + ((struct nqnfs_time *)(t))->nq_nsec = htonl((f)->ts_nsec); \ +} + +#define fxdr_hyper(f, t) { \ + ((long *)(t))[_QUAD_HIGHWORD] = ntohl(((long *)(f))[0]); \ + ((long *)(t))[_QUAD_LOWWORD] = ntohl(((long *)(f))[1]); \ +} +#define txdr_hyper(f, t) { \ + ((long *)(t))[0] = htonl(((long *)(f))[_QUAD_HIGHWORD]); \ + ((long *)(t))[1] = htonl(((long *)(f))[_QUAD_LOWWORD]); \ +} |