summaryrefslogtreecommitdiff
path: root/sys/nfs/nfs_kq.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/nfs/nfs_kq.c')
-rw-r--r--sys/nfs/nfs_kq.c359
1 files changed, 359 insertions, 0 deletions
diff --git a/sys/nfs/nfs_kq.c b/sys/nfs/nfs_kq.c
new file mode 100644
index 00000000000..d8919486392
--- /dev/null
+++ b/sys/nfs/nfs_kq.c
@@ -0,0 +1,359 @@
+/* $OpenBSD: nfs_kq.c,v 1.1 2004/07/21 17:30:56 marius Exp $ */
+/* $NetBSD: nfs_kq.c,v 1.7 2003/10/30 01:43:10 simonb Exp $ */
+
+/*-
+ * Copyright (c) 2002 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#ifdef __NetBSD__
+__KERNEL_RCSID(0, "$NetBSD: nfs_kq.c,v 1.7 2003/10/30 01:43:10 simonb Exp $");
+#endif /* __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/unistd.h>
+#include <sys/file.h>
+#include <sys/kthread.h>
+
+#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsproto.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs_var.h>
+
+struct kevq {
+ SLIST_ENTRY(kevq) kev_link;
+ struct vnode *vp;
+ u_int usecount;
+ u_int flags;
+#define KEVQ_BUSY 0x01 /* currently being processed */
+#define KEVQ_WANT 0x02 /* want to change this entry */
+ struct timespec omtime; /* old modification time */
+ struct timespec octime; /* old change time */
+ nlink_t onlink; /* old number of references to file */
+};
+SLIST_HEAD(kevqlist, kevq);
+
+static struct lock nfskevq_lock;
+static struct proc *pnfskq;
+static struct kevqlist kevlist = SLIST_HEAD_INITIALIZER(kevlist);
+
+void
+nfs_kqinit(void)
+{
+ lockinit(&nfskevq_lock, PSOCK, "nfskqlck", 0, 0);
+}
+
+/*
+ * This quite simplistic routine periodically checks for server changes
+ * of any of the watched files every NFS_MINATTRTIMO/2 seconds.
+ * Only changes in size, modification time, change time and nlinks
+ * are being checked, everything else is ignored.
+ * The routine only calls VOP_GETATTR() when it's likely it would get
+ * some new data, i.e. when the vnode expires from attrcache. This
+ * should give same result as periodically running stat(2) from userland,
+ * while keeping CPU/network usage low, and still provide proper kevent
+ * semantics.
+ * The poller thread is created when first vnode is added to watch list,
+ * and exits when the watch list is empty. The overhead of thread creation
+ * isn't really important, neither speed of attach and detach of knote.
+ */
+/* ARGSUSED */
+static void
+nfs_kqpoll(void *arg)
+{
+ struct kevq *ke;
+ struct vattr attr;
+ struct proc *p = pnfskq;
+ u_quad_t osize;
+
+ for(;;) {
+ lockmgr(&nfskevq_lock, LK_EXCLUSIVE, NULL, p);
+ SLIST_FOREACH(ke, &kevlist, kev_link) {
+ struct nfsnode *np = VTONFS(ke->vp);
+
+#ifdef DEBUG
+ printf("nfs_kqpoll on: ");
+ VOP_PRINT(ke->vp);
+#endif
+ /* skip if still in attrcache */
+ if (nfs_getattrcache(ke->vp, &attr) != ENOENT)
+ continue;
+
+ /*
+ * Mark entry busy, release lock and check
+ * for changes.
+ */
+ ke->flags |= KEVQ_BUSY;
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+
+ /* save v_size, nfs_getattr() updates it */
+ osize = np->n_size;
+
+ (void) VOP_GETATTR(ke->vp, &attr, p->p_ucred, p);
+
+ /* following is a bit fragile, but about best
+ * we can get */
+ if (attr.va_size != osize) {
+ int extended = (attr.va_size > osize);
+ VN_KNOTE(ke->vp, NOTE_WRITE
+ | (extended ? NOTE_EXTEND : 0));
+ ke->omtime = attr.va_mtime;
+ } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec
+ || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) {
+ VN_KNOTE(ke->vp, NOTE_WRITE);
+ ke->omtime = attr.va_mtime;
+ }
+
+ if (attr.va_ctime.tv_sec != ke->octime.tv_sec
+ || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) {
+ VN_KNOTE(ke->vp, NOTE_ATTRIB);
+ ke->octime = attr.va_ctime;
+ }
+
+ if (attr.va_nlink != ke->onlink) {
+ VN_KNOTE(ke->vp, NOTE_LINK);
+ ke->onlink = attr.va_nlink;
+ }
+
+ lockmgr(&nfskevq_lock, LK_EXCLUSIVE, NULL, p);
+ ke->flags &= ~KEVQ_BUSY;
+ if (ke->flags & KEVQ_WANT) {
+ ke->flags &= ~KEVQ_WANT;
+ wakeup(ke);
+ }
+ }
+
+ if (SLIST_EMPTY(&kevlist)) {
+ /* Nothing more to watch, exit */
+ pnfskq = NULL;
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+ kthread_exit(0);
+ }
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+
+ /* wait a while before checking for changes again */
+ tsleep(pnfskq, PSOCK, "nfskqpw",
+ NFS_MINATTRTIMO * hz / 2);
+
+ }
+}
+
+static void
+filt_nfsdetach(struct knote *kn)
+{
+ struct vnode *vp = (struct vnode *)kn->kn_hook;
+ struct kevq *ke;
+ struct proc *p = curproc;
+
+#ifdef notyet
+ /* XXXLUKEM lock the struct? */
+ SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
+#endif
+
+ simple_lock(&vp->v_selectinfo.vsi_lock);
+ SLIST_REMOVE(&vp->v_selectinfo.vsi_selinfo.si_note,
+ kn, knote, kn_selnext);
+ simple_unlock(&vp->v_selectinfo.vsi_lock);
+
+ /* Remove the vnode from watch list */
+ lockmgr(&nfskevq_lock, LK_EXCLUSIVE, NULL, p);
+ SLIST_FOREACH(ke, &kevlist, kev_link) {
+ if (ke->vp == vp) {
+ while (ke->flags & KEVQ_BUSY) {
+ ke->flags |= KEVQ_WANT;
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+ (void) tsleep(ke, PSOCK, "nfskqdet", 0);
+ lockmgr(&nfskevq_lock, LK_EXCLUSIVE, NULL, p);
+ }
+
+ if (ke->usecount > 1) {
+ /* keep, other kevents need this */
+ ke->usecount--;
+ } else {
+ /* last user, g/c */
+ SLIST_REMOVE(&kevlist, ke, kevq, kev_link);
+ FREE(ke, M_KEVENT);
+ }
+ break;
+ }
+ }
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+}
+
+static int
+filt_nfsread(struct knote *kn, long hint)
+{
+ struct vnode *vp = (struct vnode *)kn->kn_hook;
+ struct nfsnode *np = VTONFS(vp);
+
+ /*
+ * filesystem is gone, so set the EOF flag and schedule
+ * the knote for deletion.
+ */
+ if (hint == NOTE_REVOKE) {
+ kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+ return (1);
+ }
+
+ /* XXXLUKEM lock the struct? */
+
+ kn->kn_data = np->n_size - kn->kn_fp->f_offset;
+#ifdef DEBUG
+ printf("nfsread event. %d\n", kn->kn_data);
+#endif
+ return (kn->kn_data != 0);
+}
+
+static int
+filt_nfsvnode(struct knote *kn, long hint)
+{
+ if (kn->kn_sfflags & hint)
+ kn->kn_fflags |= hint;
+ if (hint == NOTE_REVOKE) {
+ kn->kn_flags |= EV_EOF;
+ return (1);
+ }
+ return (kn->kn_fflags != 0);
+}
+
+static const struct filterops nfsread_filtops =
+ { 1, NULL, filt_nfsdetach, filt_nfsread };
+static const struct filterops nfsvnode_filtops =
+ { 1, NULL, filt_nfsdetach, filt_nfsvnode };
+
+int
+nfs_kqfilter(void *v)
+{
+ struct vop_kqfilter_args /* {
+ struct vnode *a_vp;
+ struct knote *a_kn;
+ } */ *ap = v;
+ struct vnode *vp;
+ struct knote *kn;
+ struct kevq *ke;
+ int error = 0;
+ struct vattr attr;
+ struct proc *p = curproc; /* XXX */
+
+ vp = ap->a_vp;
+ kn = ap->a_kn;
+
+#ifdef DEBUG
+ printf("nfs_kqfilter(%d) on: ", kn->kn_filter);
+ VOP_PRINT(vp);
+#endif
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &nfsread_filtops;
+ break;
+ case EVFILT_VNODE:
+ kn->kn_fop = &nfsvnode_filtops;
+ break;
+ default:
+ return (1);
+ }
+
+ kn->kn_hook = vp;
+
+ /*
+ * Put the vnode to watched list.
+ */
+
+ /*
+ * Fetch current attributes. It's only needed when the vnode
+ * is not watched yet, but we need to do this without lock
+ * held. This is likely cheap due to attrcache, so do it now.
+ */
+ memset(&attr, 0, sizeof(attr));
+ (void) VOP_GETATTR(vp, &attr, p->p_ucred, p);
+
+ lockmgr(&nfskevq_lock, LK_EXCLUSIVE, NULL, p);
+
+ /* ensure the poller is running */
+ if (!pnfskq) {
+ error = kthread_create(nfs_kqpoll, NULL, &pnfskq,
+ "nfskqpoll");
+ if (error)
+ goto out;
+ }
+
+ SLIST_FOREACH(ke, &kevlist, kev_link)
+ if (ke->vp == vp)
+ break;
+
+ if (ke) {
+ /* already watched, so just bump usecount */
+ ke->usecount++;
+ } else {
+ /* need a new one */
+ MALLOC(ke, struct kevq *,
+ sizeof(struct kevq), M_KEVENT, M_WAITOK);
+ ke->vp = vp;
+ ke->usecount = 1;
+ ke->flags = 0;
+ ke->omtime = attr.va_mtime;
+ ke->octime = attr.va_ctime;
+ ke->onlink = attr.va_nlink;
+ SLIST_INSERT_HEAD(&kevlist, ke, kev_link);
+ }
+
+ /* kick the poller */
+ wakeup(pnfskq);
+
+ simple_lock(&vp->v_selectinfo.vsi_lock);
+ SLIST_INSERT_HEAD(&vp->v_selectinfo.vsi_selinfo.si_note, kn, kn_selnext);
+ simple_unlock(&vp->v_selectinfo.vsi_lock);
+
+#ifdef notyet
+ /* XXXLUKEM lock the struct? */
+ SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
+#endif
+
+ out:
+ lockmgr(&nfskevq_lock, LK_RELEASE, NULL, p);
+
+ return (error);
+}