diff options
author | Marc Espie <espie@cvs.openbsd.org> | 2013-06-01 14:37:33 +0000 |
---|---|---|
committer | Marc Espie <espie@cvs.openbsd.org> | 2013-06-01 14:37:33 +0000 |
commit | 566bd1ae4d70445f5d6c44ec81089ef2e0f45545 (patch) | |
tree | 86b4e5c080a63f6432336367da2b5dbed057bce8 | |
parent | f16da5fc4d1309baa29e5e98701668e6b58ce0e0 (diff) |
import tmpfs code, originally from netbsd.
heavy lifting by Pedro Martelletto, timestamp fixes by me.
THIS IS NOT ENABLED YET, AND REQUIRES UVM CHANGES AND REVIEW.
Imported, so that working on it can be mostly done in tree with less painful
diff exchanges, and that we have history of further changes.
okay tedu@, deraadt@
-rw-r--r-- | sbin/mount_tmpfs/Makefile | 17 | ||||
-rw-r--r-- | sbin/mount_tmpfs/mount_tmpfs.8 | 140 | ||||
-rw-r--r-- | sbin/mount_tmpfs/mount_tmpfs.c | 250 | ||||
-rw-r--r-- | sbin/mount_tmpfs/mount_tmpfs.h | 35 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs.h | 383 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_fifoops.c | 130 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_mem.c | 229 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_specops.c | 123 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_subr.c | 1262 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_vfsops.c | 367 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_vnops.c | 2747 | ||||
-rw-r--r-- | sys/tmpfs/tmpfs_vnops.h | 84 |
12 files changed, 5767 insertions, 0 deletions
diff --git a/sbin/mount_tmpfs/Makefile b/sbin/mount_tmpfs/Makefile new file mode 100644 index 00000000000..c1286c77320 --- /dev/null +++ b/sbin/mount_tmpfs/Makefile @@ -0,0 +1,17 @@ +# $NetBSD: Makefile,v 1.5 2009/04/11 07:58:12 lukem Exp $ + +.include <bsd.own.mk> + +PROG= mount_tmpfs +#SRCS= mount_tmpfs.c fattr.c pathadj.c +SRCS= mount_tmpfs.c getmntopts.c +MAN= mount_tmpfs.8 + +MOUNT= ${.CURDIR}/../mount +.PATH: ${MOUNT} + +CPPFLAGS+= -I${MOUNT} +DPADD+= ${LIBUTIL} +LDADD+= -lutil + +.include <bsd.prog.mk> diff --git a/sbin/mount_tmpfs/mount_tmpfs.8 b/sbin/mount_tmpfs/mount_tmpfs.8 new file mode 100644 index 00000000000..195d56b2df8 --- /dev/null +++ b/sbin/mount_tmpfs/mount_tmpfs.8 @@ -0,0 +1,140 @@ +.\" $NetBSD: mount_tmpfs.8,v 1.14 2008/04/30 13:10:53 martin Exp $ +.\" +.\" Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Julio M. Merino Vidal, developed as part of Google's Summer of Code +.\" 2005 program. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd February 13, 2008 +.Dt MOUNT_TMPFS 8 +.Os +.Sh NAME +.Nm mount_tmpfs +.Nd mount an efficient memory file system +.Sh SYNOPSIS +.Nm +.Op Fl g Ar group +.Op Fl m Ar mode +.Op Fl n Ar nodes +.Op Fl o Ar options +.Op Fl s Ar size +.Op Fl u Ar user +.Ar tmpfs +.Ar mount_point +.Sh DESCRIPTION +The +.Nm +command attaches an instance of the efficient memory file system to the +global file system namespace. +The +.Ar tmpfs +parameter only exists for compatibility with the other mount commands and +is ignored. +The directory specified by +.Ar mount_point +is converted to an absolute path before use and its attributes (owner, +group and mode) are inherited unless explicitly overriden by the options +described below. +.Pp +The following options are supported: +.Bl -tag -width XoXoptions +.It Fl g Ar group +Specifies the group name or GID of the root inode of the file system. +Defaults to the mount point's GID. +.It Fl m Ar mode +Specifies the mode (in octal notation) of the root inode of the file system. +Defaults to the mount point's mode. +.It Fl n Ar nodes +Specifies the maximum number of nodes available to the file system. +If not specified, the file system chooses a reasonable maximum given its +size at mount time, which can be limited with +.Fl s . +.It Fl o Ar options +Options are specified with a +.Fl o +flag followed by a comma-separated string of options. +See the +.Xr mount 8 +man page for possible options and their meanings. +.It Fl s Ar size +Specifies the total file system size in bytes. +If zero is given (the default), the available amount of memory (including +main memory and swap space) will be used. +Note that four megabytes are always reserved for the system and cannot +be assigned to the file system. +.It Fl u Ar user +Specifies the user name or UID of the root inode of the file system. +Defaults to the mount point's UID. +.El +.Pp +Every option that accepts a numerical value as its argument can take a +trailing +.Sq b +to indicate bytes (the default), a trailing +.Sq k +to indicate kilobytes, a trailing +.Sq M +to indicate megabytes or a trailing +.Sq G +to indicate gigabytes. +Note that both lowercase and uppercase forms of these letters are allowed. +.Sh EXAMPLES +The following command mounts a tmpfs instance over the +.Pa /tmp +directory, inheriting its owner, group and mode settings: +.Pp +.Ic "mount -t tmpfs tmpfs /tmp" +.Pp +The following command mounts a tmpfs instance over the +.Pa /mnt +directory, setting a 20 megabytes limit in space, owned by the +.Sq joe +user and belonging to the +.Sq users +group, with a restricted 0700 mode: +.Pp +.Ic "mount -t tmpfs -o -s20M -o -ujoe -o -gusers -o -m0700 tmpfs /mnt" +.Sh SEE ALSO +.Xr fstab 5 , +.Xr mount 8 +.Sh HISTORY +The +.Nm +utility first appeared in +.Nx 4.0 . +.Sh CAVEATS +The update of mount options (through mount -u) is currently not supported. +.Sh BUGS +File system meta-data is not pageable. +If there is not enough main memory to hold this information, the system may +become unstable or very unresponsive because it will not be able to allocate +required memory. +A malicious user could trigger this condition if he could create lots of +files inside a size-unbounded tmpfs file system. +Limiting the number of nodes per file system +.Pq Fl n +will prevent this; the default value for this setting is also often adjusted +to an adequate value to resolve this. diff --git a/sbin/mount_tmpfs/mount_tmpfs.c b/sbin/mount_tmpfs/mount_tmpfs.c new file mode 100644 index 00000000000..28d43b0af3c --- /dev/null +++ b/sbin/mount_tmpfs/mount_tmpfs.c @@ -0,0 +1,250 @@ +/* $NetBSD: mount_tmpfs.c,v 1.24 2008/08/05 20:57:45 pooka Exp $ */ + +/* + * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#if 0 +__RCSID("$NetBSD: mount_tmpfs.c,v 1.24 2008/08/05 20:57:45 pooka Exp $"); +#endif + +#include <sys/param.h> +#include <sys/mount.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <grp.h> +#include <mntopts.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mount_tmpfs.h" + +/* --------------------------------------------------------------------- */ + +static const struct mntopt mopts[] = { + MOPT_STDOPTS, + MOPT_UPDATE, + { NULL }, +}; + +/* --------------------------------------------------------------------- */ + +static void usage(void) __dead; +static uid_t a_uid(const char *); +static gid_t a_gid(const char *); +static uid_t a_gid(const char *); +static int a_num(const char *, const char *); +static mode_t a_mask(const char *); +static void pathadj(const char *, char *); + +/* --------------------------------------------------------------------- */ + +void +mount_tmpfs_parseargs(int argc, char *argv[], + struct tmpfs_args *args, int *mntflags, + char *canon_dev, char *canon_dir) +{ + int gidset, modeset, uidset; /* Ought to be 'bool'. */ + int ch; + gid_t gid; + uid_t uid; + mode_t mode; + int64_t tmpnumber; + struct stat sb; + + /* Set default values for mount point arguments. */ + memset(args, 0, sizeof(*args)); + args->ta_version = TMPFS_ARGS_VERSION; + args->ta_size_max = 0; + args->ta_nodes_max = 0; + *mntflags = 0; + + gidset = 0; gid = 0; + uidset = 0; uid = 0; + modeset = 0; mode = 0; + + optind = optreset = 1; + while ((ch = getopt(argc, argv, "g:m:n:o:s:u:")) != -1 ) { + switch (ch) { + case 'g': + gid = a_gid(optarg); + gidset = 1; + break; + + case 'm': + mode = a_mask(optarg); + modeset = 1; + break; + + case 'n': + + if (scan_scaled(optarg, &tmpnumber) == -1) + err(EXIT_FAILURE, "failed to parse nodes `%s'", + optarg); + args->ta_nodes_max = tmpnumber; + break; + + case 'o': + getmntopts(optarg, mopts, mntflags); + break; + + case 's': + if (scan_scaled(optarg, &tmpnumber) == -1) + err(EXIT_FAILURE, "failed to parse size `%s'", + optarg); + args->ta_size_max = tmpnumber; + break; + + case 'u': + uid = a_uid(optarg); + uidset = 1; + break; + + case '?': + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + + strlcpy(canon_dev, argv[0], MAXPATHLEN); + pathadj(argv[1], canon_dir); + + if (stat(canon_dir, &sb) == -1) + err(EXIT_FAILURE, "cannot stat `%s'", canon_dir); + + args->ta_root_uid = uidset ? uid : sb.st_uid; + args->ta_root_gid = gidset ? gid : sb.st_gid; + args->ta_root_mode = modeset ? mode : sb.st_mode; +} + +/* --------------------------------------------------------------------- */ + +static void +usage(void) +{ + extern char *__progname; + (void)fprintf(stderr, + "Usage: %s [-g group] [-m mode] [-n nodes] [-o options] [-s size]\n" + " [-u user] tmpfs mountpoint\n", __progname); + exit(1); +} + +/* --------------------------------------------------------------------- */ + +int +mount_tmpfs(int argc, char *argv[]) +{ + struct tmpfs_args args; + char canon_dev[MAXPATHLEN], canon_dir[MAXPATHLEN]; + int mntflags; + + mount_tmpfs_parseargs(argc, argv, &args, &mntflags, + canon_dev, canon_dir); + + if (mount(MOUNT_TMPFS, canon_dir, mntflags, &args) == -1) + err(EXIT_FAILURE, "tmpfs on %s", canon_dir); + + return EXIT_SUCCESS; +} + +int +main(int argc, char *argv[]) +{ + + /* setprogname(argv[0]); */ + return mount_tmpfs(argc, argv); +} + +static uid_t +a_uid(const char *s) +{ + struct passwd *pw; + + if ((pw = getpwnam(s)) != NULL) + return pw->pw_uid; + return a_num(s, "user"); +} + +static gid_t +a_gid(const char *s) +{ + struct group *gr; + + if ((gr = getgrnam(s)) != NULL) + return gr->gr_gid; + return a_num(s, "group"); +} + +static int +a_num(const char *s, const char *id_type) +{ + int id; + char *ep; + + id = strtol(s, &ep, 0); + if (*ep || s == ep || id < 0) + errx(1, "unknown %s id: %s", id_type, s); + return id; +} + +static mode_t +a_mask(const char *s) +{ + int rv; + char *ep; + + rv = strtol(s, &ep, 8); + if (s == ep || *ep || rv < 0) + errx(1, "invalid file mode: %s", s); + return rv; +} + +static void +pathadj(const char *input, char *adjusted) +{ + + if (realpath(input, adjusted) == NULL) + warn("Warning: realpath %s", input); + if (strncmp(input, adjusted, MAXPATHLEN)) { + warnx("\"%s\" is a non-resolved or relative path.", input); + warnx("using \"%s\" instead.", adjusted); + } +} diff --git a/sbin/mount_tmpfs/mount_tmpfs.h b/sbin/mount_tmpfs/mount_tmpfs.h new file mode 100644 index 00000000000..10fc2eec9bf --- /dev/null +++ b/sbin/mount_tmpfs/mount_tmpfs.h @@ -0,0 +1,35 @@ +/* $NetBSD: mount_tmpfs.h,v 1.1 2008/08/05 20:57:45 pooka Exp $ */ + +/* + * Copyright (c) 2008 The NetBSD Foundation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SBIN_MOUNT_TMPFS_MOUNT_TMPFS_H_ +#define _SBIN_MOUNT_TMPFS_MOUNT_TMPFS_H_ + +int mount_tmpfs(int, char **); +void mount_tmpfs_parseargs(int, char **, struct tmpfs_args *, int *, + char *, char *); + +#endif /* _SBIN_MOUNT_TMPFS_MOUNT_TMPFS_H_ */ diff --git a/sys/tmpfs/tmpfs.h b/sys/tmpfs/tmpfs.h new file mode 100644 index 00000000000..207957d7ba1 --- /dev/null +++ b/sys/tmpfs/tmpfs.h @@ -0,0 +1,383 @@ +/* $NetBSD: tmpfs.h,v 1.45 2011/09/27 01:10:43 christos Exp $ */ + +/* + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FS_TMPFS_TMPFS_H_ +#define _FS_TMPFS_TMPFS_H_ + +#if !defined(_KERNEL) && !defined(_KMEMUSER) +#error "not supposed to be exposed to userland" +#endif + +#include <sys/dirent.h> +#include <sys/mount.h> +#include <sys/pool.h> +#include <sys/queue.h> +#include <sys/stdint.h> +#include <sys/rwlock.h> + +/* + * Internal representation of a tmpfs directory entry. + * + * All fields are protected by vnode lock. + */ +typedef struct tmpfs_dirent { + TAILQ_ENTRY(tmpfs_dirent) td_entries; + + /* Pointer to the inode this entry refers to. */ + struct tmpfs_node * td_node; + + /* Name and its length. */ + char * td_name; + uint16_t td_namelen; +} tmpfs_dirent_t; + +TAILQ_HEAD(tmpfs_dir, tmpfs_dirent); + +#if defined(_KERNEL) + +/* TMPFS_MAXNAMLEN can't exceed UINT16_MAX. */ +#define TMPFS_MAXNAMLEN 255 + +#define TMPFS_DIRCOOKIE_DOT 0 +#define TMPFS_DIRCOOKIE_DOTDOT 1 +#define TMPFS_DIRCOOKIE_EOF 2 + +/* + * Each entry in a directory has a cookie that identifies it. Cookies + * supersede offsets within directories, as tmpfs has no offsets as such. + * + * The '.', '..' and the end of directory markers have fixed cookies, + * which cannot collide with the cookies generated by other entries. + * + * The cookies for the other entries are generated based on the memory + * address of their representative meta-data structure. + * + * XXX: Truncating directory cookies to 31 bits now - workaround for + * problem with Linux compat, see PR/32034. + */ +static inline off_t +tmpfs_dircookie(tmpfs_dirent_t *de) +{ + off_t cookie; + + cookie = ((off_t)(uintptr_t)de >> 1) & 0x7FFFFFFF; + KASSERT(cookie != TMPFS_DIRCOOKIE_DOT); + KASSERT(cookie != TMPFS_DIRCOOKIE_DOTDOT); + KASSERT(cookie != TMPFS_DIRCOOKIE_EOF); + + return cookie; +} +#endif + +/* + * Internal representation of a tmpfs file system node -- inode. + * + * This structure is splitted in two parts: one holds attributes common + * to all file types and the other holds data that is only applicable to + * a particular type. + * + * All fields are protected by vnode lock. The vnode association itself + * is protected by tmpfs_node_t::tn_nlock. + */ +typedef struct tmpfs_node { + LIST_ENTRY(tmpfs_node) tn_entries; + + /* + * Each inode has a corresponding vnode. It is a bi-directional + * association. Whenever vnode is allocated, its v_data field is + * set to the inode it reference, and tmpfs_node_t::tn_vnode is + * set to point to the said vnode. + * + * Further attempts to allocate a vnode for this same node will + * result in returning a new reference to the value stored in + * tn_vnode. It may be NULL when the node is unused (that is, + * no vnode has been allocated or it has been reclaimed). + */ + struct rwlock tn_nlock; /* node lock */ + struct lock tn_vlock; /* vnode lock */ + struct vnode * tn_vnode; + + /* Directory entry. Only a hint, since hard link can have multiple. */ + tmpfs_dirent_t * tn_dirent_hint; + + /* The inode type: VBLK, VCHR, VDIR, VFIFO, VLNK, VREG or VSOCK. */ + enum vtype tn_type; + + /* Inode identifier and generation number. */ + ino_t tn_id; + unsigned long tn_gen; + + /* The inode size. */ + off_t tn_size; + + /* Generic node attributes. */ + uid_t tn_uid; + gid_t tn_gid; + mode_t tn_mode; + int tn_flags; + nlink_t tn_links; + struct timespec tn_atime; + struct timespec tn_mtime; + struct timespec tn_ctime; + struct timespec tn_birthtime; + + /* Head of byte-level lock list (used by tmpfs_advlock). */ + struct lockf * tn_lockf; + + union { + /* Type case: VBLK or VCHR. */ + struct { + dev_t tn_rdev; + } tn_dev; + + /* Type case: VDIR. */ + struct { + /* Parent directory (root inode points to itself). */ + struct tmpfs_node * tn_parent; + + /* List of directory entries. */ + struct tmpfs_dir tn_dir; + + /* + * Number and pointer of the last directory entry + * returned by the readdir(3) operation. + */ + off_t tn_readdir_lastn; + struct tmpfs_dirent * tn_readdir_lastp; + } tn_dir; + + /* Type case: VLNK. */ + struct tn_lnk { + /* The link's target. */ + char * tn_link; + } tn_lnk; + + /* Type case: VREG. */ + struct tn_reg { + /* Underlying UVM object to store contents. */ + struct uvm_object * tn_aobj; + size_t tn_aobj_pages; + vaddr_t tn_aobj_pgptr; + voff_t tn_aobj_pgnum; + } tn_reg; + } tn_spec; + +#define tn_uobj tn_spec.tn_reg.tn_aobj +#define tn_pgptr tn_spec.tn_reg.tn_aobj_pgptr +#define tn_pgnum tn_spec.tn_reg.tn_aobj_pgnum + +} tmpfs_node_t; + +#if defined(_KERNEL) + +LIST_HEAD(tmpfs_node_list, tmpfs_node); + +/* Status flags. */ +#define TMPFS_NODE_ACCESSED 0x01 +#define TMPFS_NODE_MODIFIED 0x02 +#define TMPFS_NODE_CHANGED 0x04 + +#define TMPFS_NODE_STATUSALL \ + (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED) + +/* + * Bit indicating vnode reclamation. + * We abuse tmpfs_node_t::tn_gen for that. + */ +#define TMPFS_NODE_GEN_MASK (~0UL >> 1) +#define TMPFS_RECLAIMING_BIT (~TMPFS_NODE_GEN_MASK) + +#define TMPFS_NODE_RECLAIMING(node) \ + (((node)->tn_gen & TMPFS_RECLAIMING_BIT) != 0) + +#define TMPFS_NODE_GEN(node) \ + ((node)->tn_gen & TMPFS_NODE_GEN_MASK) + +/* White-out inode indicator. */ +#define TMPFS_NODE_WHITEOUT ((tmpfs_node_t *)-1) + +/* + * Internal representation of a tmpfs mount point. + */ +typedef struct tmpfs_mount { + /* Limit and number of bytes in use by the file system. */ + uint64_t tm_mem_limit; + uint64_t tm_bytes_used; + struct rwlock tm_acc_lock; + + /* Pointer to the root inode. */ + tmpfs_node_t * tm_root; + + /* Maximum number of possible nodes for this file system. */ + unsigned int tm_nodes_max; + + /* Number of nodes currently allocated. */ + unsigned int tm_nodes_cnt; + + /* List of inodes and the lock protecting it. */ + struct rwlock tm_lock; + struct tmpfs_node_list tm_nodes; +} tmpfs_mount_t; + +/* + * This structure maps a file identifier to a tmpfs node. Used by the + * NFS code. + */ +typedef struct tmpfs_fid { + uint16_t tf_len; + uint16_t tf_pad; + uint32_t tf_gen; + ino_t tf_id; +} tmpfs_fid_t; + +/* + * Prototypes for tmpfs_subr.c. + */ + +int tmpfs_alloc_node(tmpfs_mount_t *, enum vtype, uid_t, gid_t, + mode_t, char *, dev_t, tmpfs_node_t **); +void tmpfs_free_node(tmpfs_mount_t *, tmpfs_node_t *); + +int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *, + struct componentname *, char *); + +int tmpfs_vnode_get(struct mount *, tmpfs_node_t *, struct vnode **); + +int tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t, + tmpfs_dirent_t **); +void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *); +void tmpfs_dir_attach(struct vnode *, tmpfs_dirent_t *, tmpfs_node_t *); +void tmpfs_dir_detach(struct vnode *, tmpfs_dirent_t *); + +tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *); +tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *); + +int tmpfs_dir_getdotdent(tmpfs_node_t *, struct uio *); +int tmpfs_dir_getdotdotdent(tmpfs_node_t *, struct uio *); +tmpfs_dirent_t *tmpfs_dir_lookupbycookie(tmpfs_node_t *, off_t); +int tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *); + +int tmpfs_reg_resize(struct vnode *, off_t); +int tmpfs_truncate(struct vnode *, off_t); + +int tmpfs_chflags(struct vnode *, int, struct ucred *, struct proc *); +int tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct proc *); +int tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct proc *); +int tmpfs_chsize(struct vnode *, u_quad_t, struct ucred *, struct proc *); +int tmpfs_chtimes(struct vnode *, const struct timespec *, + const struct timespec *, int, struct ucred *, + struct proc *); +void tmpfs_update(tmpfs_node_t *, int); +int tmpfs_zeropg(tmpfs_node_t *, voff_t, vaddr_t); +int tmpfs_uio_cached(tmpfs_node_t *); +int tmpfs_uiomove(tmpfs_node_t *, struct uio *, vsize_t); +void tmpfs_uio_uncache(tmpfs_node_t *); +void tmpfs_uio_cache(tmpfs_node_t *, voff_t, vaddr_t); +vaddr_t tmpfs_uio_lookup(tmpfs_node_t *, voff_t); + +/* + * Prototypes for tmpfs_mem.c. + */ + +void tmpfs_mntmem_init(tmpfs_mount_t *, uint64_t); +void tmpfs_mntmem_destroy(tmpfs_mount_t *); + +size_t tmpfs_mem_info(int); +uint64_t tmpfs_bytes_max(tmpfs_mount_t *); +uint64_t tmpfs_pages_avail(tmpfs_mount_t *); +int tmpfs_mem_incr(tmpfs_mount_t *, size_t); +void tmpfs_mem_decr(tmpfs_mount_t *, size_t); + +tmpfs_dirent_t *tmpfs_dirent_get(tmpfs_mount_t *); +void tmpfs_dirent_put(tmpfs_mount_t *, tmpfs_dirent_t *); + +tmpfs_node_t * tmpfs_node_get(tmpfs_mount_t *); +void tmpfs_node_put(tmpfs_mount_t *, tmpfs_node_t *); + +char * tmpfs_strname_alloc(tmpfs_mount_t *, size_t); +void tmpfs_strname_free(tmpfs_mount_t *, char *, size_t); +int tmpfs_strname_neqlen(struct componentname *, struct componentname *); + +/* + * Ensures that the node pointed by 'node' is a directory and that its + * contents are consistent with respect to directories. + */ +#define TMPFS_VALIDATE_DIR(node) \ + KASSERT((node)->tn_type == VDIR); \ + KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0); \ + KASSERT((node)->tn_spec.tn_dir.tn_readdir_lastp == NULL || \ + tmpfs_dircookie((node)->tn_spec.tn_dir.tn_readdir_lastp) == \ + (node)->tn_spec.tn_dir.tn_readdir_lastn); + +/* + * Memory management stuff. + */ + +/* Amount of memory pages to reserve for the system. */ +#define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE) + +/* + * Routines to convert VFS structures to tmpfs internal ones. + */ + +static inline tmpfs_mount_t * +VFS_TO_TMPFS(struct mount *mp) +{ + tmpfs_mount_t *tmp = mp->mnt_data; + + KASSERT(tmp != NULL); + return tmp; +} + +static inline tmpfs_node_t * +VP_TO_TMPFS_DIR(struct vnode *vp) +{ + tmpfs_node_t *node = vp->v_data; + + KASSERT(node != NULL); + TMPFS_VALIDATE_DIR(node); + return node; +} + +#endif /* defined(_KERNEL) */ + +static __inline tmpfs_node_t * +VP_TO_TMPFS_NODE(struct vnode *vp) +{ + tmpfs_node_t *node = vp->v_data; +#ifdef KASSERT + KASSERT(node != NULL); +#endif + return node; +} + +#endif /* _FS_TMPFS_TMPFS_H_ */ diff --git a/sys/tmpfs/tmpfs_fifoops.c b/sys/tmpfs/tmpfs_fifoops.c new file mode 100644 index 00000000000..cc89d5f6d3b --- /dev/null +++ b/sys/tmpfs/tmpfs_fifoops.c @@ -0,0 +1,130 @@ +/* $NetBSD: tmpfs_fifoops.c,v 1.9 2011/05/24 20:17:49 rmind Exp $ */ + +/* + * Copyright (c) 2005 The NetBSD Foundation, Inc. + * Copyright (c) 2013 Pedro Martelletto + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * tmpfs vnode interface for named pipes. + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_fifoops.c,v 1.9 2011/05/24 20:17:49 rmind Exp $"); +#endif + +#include <sys/param.h> +#include <sys/vnode.h> + +#include <tmpfs/tmpfs.h> +#include <miscfs/fifofs/fifo.h> +#include <tmpfs/tmpfs_vnops.h> + +int tmpfs_fifo_close (void *); +int tmpfs_fifo_read (void *); +int tmpfs_fifo_write (void *); +int tmpfs_fifo_fsync (void *); + +/* + * vnode operations vector used for fifos stored in a tmpfs file system. + */ + +struct vops tmpfs_fifovops = { + .vop_lookup = vop_generic_lookup, + .vop_create = fifo_badop, + .vop_mknod = fifo_badop, + .vop_open = fifo_open, + .vop_close = fifo_close, + .vop_access = tmpfs_access, + .vop_getattr = tmpfs_getattr, + .vop_setattr = tmpfs_setattr, + .vop_read = tmpfs_fifo_read, + .vop_write = tmpfs_fifo_write, + .vop_ioctl = fifo_ioctl, + .vop_poll = fifo_poll, + .vop_kqfilter = fifo_kqfilter, + .vop_revoke = vop_generic_revoke, + .vop_fsync = tmpfs_fifo_fsync, + .vop_remove = fifo_badop, + .vop_link = fifo_badop, + .vop_rename = fifo_badop, + .vop_mkdir = fifo_badop, + .vop_rmdir = fifo_badop, + .vop_symlink = fifo_badop, + .vop_readdir = fifo_badop, + .vop_readlink = fifo_badop, + .vop_abortop = fifo_badop, + .vop_inactive = tmpfs_inactive, + .vop_reclaim = tmpfs_reclaim, + .vop_lock = tmpfs_lock, + .vop_unlock = tmpfs_unlock, + .vop_bmap = vop_generic_bmap, + .vop_strategy = fifo_badop, + .vop_print = tmpfs_print, + .vop_pathconf = fifo_pathconf, + .vop_advlock = fifo_advlock, + .vop_bwrite = tmpfs_bwrite, +}; + +int +tmpfs_fifo_read(void *v) +{ + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_ACCESSED); + return (fifo_read(v)); +} + +int +tmpfs_fifo_write(void *v) +{ + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_MODIFIED); + return (fifo_write(v)); +} + +int +tmpfs_fifo_fsync(void *v) +{ + return (0); +} diff --git a/sys/tmpfs/tmpfs_mem.c b/sys/tmpfs/tmpfs_mem.c new file mode 100644 index 00000000000..d88adb0bcb7 --- /dev/null +++ b/sys/tmpfs/tmpfs_mem.c @@ -0,0 +1,229 @@ +/* $NetBSD: tmpfs_mem.c,v 1.4 2011/05/24 01:09:47 rmind Exp $ */ + +/* + * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Mindaugas Rasiukevicius. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * tmpfs memory allocation routines. + * Implements memory usage accounting and limiting. + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_mem.c,v 1.4 2011/05/24 01:09:47 rmind Exp $"); +#endif + +#include <sys/param.h> +#include <sys/namei.h> +#include <sys/pool.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <tmpfs/tmpfs.h> + +extern struct pool tmpfs_dirent_pool; +extern struct pool tmpfs_node_pool; + +void +tmpfs_mntmem_init(struct tmpfs_mount *mp, uint64_t memlimit) +{ + + rw_init(&mp->tm_acc_lock, "tacclk"); + mp->tm_mem_limit = memlimit; + mp->tm_bytes_used = 0; +} + +void +tmpfs_mntmem_destroy(struct tmpfs_mount *mp) +{ + + KASSERT(mp->tm_bytes_used == 0); + /* mutex_destroy(&mp->tm_acc_lock); */ +} + +/* + * tmpfs_mem_info: return the number of available memory pages. + * + * => If 'total' is true, then return _total_ amount of pages. + * => If false, then return the amount of _free_ memory pages. + * + * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid + * excessive memory usage. + */ +size_t +tmpfs_mem_info(int total) +{ + int size = 0; + + /* XXX: unlocked */ + size += uvmexp.swpages; + if (!total) { + size -= uvmexp.swpgonly; + } + + size += uvmexp.free; + /* size += uvmexp.filepages; */ + if (size > uvmexp.wired) { + size -= uvmexp.wired; + } else { + size = 0; + } + + KASSERT(size >= 0); + + return (size_t)size; +} + +uint64_t +tmpfs_bytes_max(struct tmpfs_mount *mp) +{ + size_t freepages = tmpfs_mem_info(0); + uint64_t avail_mem; + + if (freepages < TMPFS_PAGES_RESERVED) { + freepages = 0; + } else { + freepages -= TMPFS_PAGES_RESERVED; + } + avail_mem = round_page(mp->tm_bytes_used) + (freepages << PAGE_SHIFT); + return MIN(mp->tm_mem_limit, avail_mem); +} + +uint64_t +tmpfs_pages_avail(struct tmpfs_mount *mp) +{ + + return (tmpfs_bytes_max(mp) - mp->tm_bytes_used) >> PAGE_SHIFT; +} + +int +tmpfs_mem_incr(struct tmpfs_mount *mp, size_t sz) +{ + uint64_t lim; + + rw_enter_write(&mp->tm_acc_lock); + lim = tmpfs_bytes_max(mp); + if (mp->tm_bytes_used + sz >= lim) { + rw_exit_write(&mp->tm_acc_lock); + return 0; + } + mp->tm_bytes_used += sz; + rw_exit_write(&mp->tm_acc_lock); + return 1; +} + +void +tmpfs_mem_decr(struct tmpfs_mount *mp, size_t sz) +{ + + rw_enter_write(&mp->tm_acc_lock); + KASSERT(mp->tm_bytes_used >= sz); + mp->tm_bytes_used -= sz; + rw_exit_write(&mp->tm_acc_lock); +} + +struct tmpfs_dirent * +tmpfs_dirent_get(struct tmpfs_mount *mp) +{ + + if (!tmpfs_mem_incr(mp, sizeof(struct tmpfs_dirent))) { + return NULL; + } + return pool_get(&tmpfs_dirent_pool, PR_WAITOK); +} + +void +tmpfs_dirent_put(struct tmpfs_mount *mp, struct tmpfs_dirent *de) +{ + + tmpfs_mem_decr(mp, sizeof(struct tmpfs_dirent)); + pool_put(&tmpfs_dirent_pool, de); +} + +struct tmpfs_node * +tmpfs_node_get(struct tmpfs_mount *mp) +{ + + mp->tm_nodes_cnt++; + if (mp->tm_nodes_cnt > mp->tm_nodes_max) { + mp->tm_nodes_cnt--; + return NULL; + } + if (!tmpfs_mem_incr(mp, sizeof(struct tmpfs_node))) { + return NULL; + } + return pool_get(&tmpfs_node_pool, PR_WAITOK); +} + +void +tmpfs_node_put(struct tmpfs_mount *mp, struct tmpfs_node *tn) +{ + + mp->tm_nodes_cnt--; + tmpfs_mem_decr(mp, sizeof(struct tmpfs_node)); + pool_put(&tmpfs_node_pool, tn); +} + +/* + * Quantum size to round-up the tmpfs names in order to reduce re-allocations. + */ + +#define TMPFS_NAME_QUANTUM (32) +#define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ + +char * +tmpfs_strname_alloc(struct tmpfs_mount *mp, size_t len) +{ + const size_t sz = roundup2(len, TMPFS_NAME_QUANTUM); + + KASSERT(sz > 0 && sz <= 1024); + if (!tmpfs_mem_incr(mp, sz)) { + return NULL; + } + return malloc(sz, M_TEMP, M_WAITOK); /* XXX */ +} + +void +tmpfs_strname_free(struct tmpfs_mount *mp, char *str, size_t len) +{ + const size_t sz = roundup2(len, TMPFS_NAME_QUANTUM); + + KASSERT(sz > 0 && sz <= 1024); + tmpfs_mem_decr(mp, sz); + free(str, M_TEMP); +} + +int +tmpfs_strname_neqlen(struct componentname *fcnp, struct componentname *tcnp) +{ + const size_t fln = roundup2(fcnp->cn_namelen, TMPFS_NAME_QUANTUM); + const size_t tln = roundup2(tcnp->cn_namelen, TMPFS_NAME_QUANTUM); + + return (fln != tln) || memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fln); +} diff --git a/sys/tmpfs/tmpfs_specops.c b/sys/tmpfs/tmpfs_specops.c new file mode 100644 index 00000000000..939d4a54a3a --- /dev/null +++ b/sys/tmpfs/tmpfs_specops.c @@ -0,0 +1,123 @@ +/* $NetBSD: tmpfs_specops.c,v 1.10 2011/05/24 20:17:49 rmind Exp $ */ + +/* + * Copyright (c) 2005 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * tmpfs vnode interface for special devices. + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_specops.c,v 1.10 2011/05/24 20:17:49 rmind Exp $"); +#endif + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/specdev.h> +#include <tmpfs/tmpfs_vnops.h> + +#include <tmpfs/tmpfs.h> + +int tmpfs_spec_close (void *); +int tmpfs_spec_read (void *); +int tmpfs_spec_write (void *); + +/* + * vnode operations vector used for special devices stored in a tmpfs + * file system. + */ + +struct vops tmpfs_specvops = { + .vop_lookup = vop_generic_lookup, + .vop_create = spec_badop, + .vop_mknod = spec_badop, + .vop_open = spec_badop, + .vop_close = spec_close, + .vop_access = tmpfs_access, + .vop_getattr = tmpfs_getattr, + .vop_setattr = tmpfs_setattr, + .vop_read = tmpfs_spec_read, + .vop_write = tmpfs_spec_write, + .vop_ioctl = spec_ioctl, + .vop_poll = spec_poll, + .vop_kqfilter = spec_kqfilter, + .vop_revoke = vop_generic_revoke, + .vop_fsync = spec_fsync, + .vop_remove = spec_badop, + .vop_link = spec_badop, + .vop_rename = spec_badop, + .vop_mkdir = spec_badop, + .vop_rmdir = spec_badop, + .vop_symlink = spec_badop, + .vop_readdir = spec_badop, + .vop_readlink = spec_badop, + .vop_abortop = spec_badop, + .vop_inactive = tmpfs_inactive, + .vop_reclaim = tmpfs_reclaim, + .vop_lock = tmpfs_lock, + .vop_unlock = tmpfs_unlock, + .vop_bmap = vop_generic_bmap, + .vop_strategy = spec_strategy, + .vop_print = tmpfs_print, + .vop_pathconf = spec_pathconf, + .vop_advlock = spec_advlock, + .vop_bwrite = vop_generic_bwrite, +}; + +int +tmpfs_spec_read(void *v) +{ + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_ACCESSED); + return (spec_read(ap)); +} + +int +tmpfs_spec_write(void *v) +{ + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_MODIFIED); + return (spec_write(ap)); +} diff --git a/sys/tmpfs/tmpfs_subr.c b/sys/tmpfs/tmpfs_subr.c new file mode 100644 index 00000000000..cf044459685 --- /dev/null +++ b/sys/tmpfs/tmpfs_subr.c @@ -0,0 +1,1262 @@ +/* $NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $ */ + +/* + * Copyright (c) 2005-2011 The NetBSD Foundation, Inc. + * Copyright (c) 2013 Pedro Martelletto + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program, and by Mindaugas Rasiukevicius. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Efficient memory file system: interfaces for inode and directory entry + * construction, destruction and manipulation. + * + * Reference counting + * + * The link count of inode (tmpfs_node_t::tn_links) is used as a + * reference counter. However, it has slightly different semantics. + * + * For directories - link count represents directory entries, which + * refer to the directories. In other words, it represents the count + * of sub-directories. It also takes into account the virtual '.' + * entry (which has no real entry in the list). For files - link count + * represents the hard links. Since only empty directories can be + * removed - link count aligns the reference counting requirements + * enough. Note: to check whether directory is not empty, the inode + * size (tmpfs_node_t::tn_size) can be used. + * + * The inode itself, as an object, gathers its first reference when + * directory entry is attached via tmpfs_dir_attach(9). For instance, + * after regular tmpfs_create(), a file would have a link count of 1, + * while directory after tmpfs_mkdir() would have 2 (due to '.'). + * + * Reclamation + * + * It should be noted that tmpfs inodes rely on a combination of vnode + * reference counting and link counting. That is, an inode can only be + * destroyed if its associated vnode is inactive. The destruction is + * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted + * that tmpfs_node_t::tn_links being 0 is a destruction criterion. + * + * If an inode has references within the file system (tn_links > 0) and + * its inactive vnode gets reclaimed/recycled - then the association is + * broken in tmpfs_reclaim(). In such case, an inode will always pass + * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode. + * + * Lock order + * + * tmpfs_node_t::tn_nlock -> + * struct vnode::v_vlock -> + * struct vnode::v_interlock + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $"); +#endif + +#include <sys/param.h> +#include <sys/dirent.h> +#include <sys/event.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/time.h> +#include <sys/proc.h> +#include <sys/stat.h> +#include <sys/systm.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <uvm/uvm.h> + +#include <tmpfs/tmpfs.h> +#include <tmpfs/tmpfs_vnops.h> + +/* + * tmpfs_alloc_node: allocate a new inode of a specified type and + * insert it into the list of specified mount point. + */ +int +tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid, + mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node) +{ + tmpfs_node_t *nnode; + struct uvm_object *uobj; + + nnode = tmpfs_node_get(tmp); + if (nnode == NULL) { + return ENOSPC; + } + + /* Initially, no references and no associations. */ + nnode->tn_links = 0; + nnode->tn_vnode = NULL; + nnode->tn_dirent_hint = NULL; + + /* + * XXX Where the pool is backed by a map larger than (4GB * + * sizeof(*nnode)), this may produce duplicate inode numbers + * for applications that do not understand 64-bit ino_t. + */ + nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); + nnode->tn_gen = TMPFS_NODE_GEN_MASK & random(); + + /* Generic initialization. */ + nnode->tn_type = type; + nnode->tn_size = 0; + nnode->tn_flags = 0; + nnode->tn_lockf = NULL; + + nanotime(&nnode->tn_atime); + nnode->tn_birthtime = nnode->tn_atime; + nnode->tn_ctime = nnode->tn_atime; + nnode->tn_mtime = nnode->tn_atime; + + /* XXX pedro: we should check for UID_MAX and GID_MAX instead. */ + KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); + + nnode->tn_uid = uid; + nnode->tn_gid = gid; + nnode->tn_mode = mode; + + /* Type-specific initialization. */ + switch (nnode->tn_type) { + case VBLK: + case VCHR: + /* Character/block special device. */ + KASSERT(rdev != VNOVAL); + nnode->tn_spec.tn_dev.tn_rdev = rdev; + break; + case VDIR: + /* Directory. */ + TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir); + nnode->tn_spec.tn_dir.tn_parent = NULL; + nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; + nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; + + /* Extra link count for the virtual '.' entry. */ + nnode->tn_links++; + break; + case VFIFO: + case VSOCK: + break; + case VLNK: + /* Symbolic link. Target specifies the file name. */ + KASSERT(target && strlen(target) < MAXPATHLEN); + + nnode->tn_size = strlen(target); + if (nnode->tn_size == 0) { + nnode->tn_spec.tn_lnk.tn_link = NULL; + break; + } + nnode->tn_spec.tn_lnk.tn_link = + tmpfs_strname_alloc(tmp, nnode->tn_size); + if (nnode->tn_spec.tn_lnk.tn_link == NULL) { + tmpfs_node_put(tmp, nnode); + return ENOSPC; + } + memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); + break; + case VREG: + /* Regular file. Create an underlying UVM object. */ + uobj = uao_create(0, UAO_FLAG_CANFAIL); + if (uobj == NULL) { + tmpfs_node_put(tmp, nnode); + return ENOSPC; + } + nnode->tn_spec.tn_reg.tn_aobj = uobj; + nnode->tn_spec.tn_reg.tn_aobj_pages = 0; + nnode->tn_spec.tn_reg.tn_aobj_pgptr = (vaddr_t)NULL; + nnode->tn_spec.tn_reg.tn_aobj_pgnum = (voff_t)-1; + break; + default: + KASSERT(0); + } + + rw_init(&nnode->tn_nlock, "tvlk"); + + rw_enter_write(&tmp->tm_lock); + LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); + rw_exit_write(&tmp->tm_lock); + + *node = nnode; + return 0; +} + +/* + * tmpfs_free_node: remove the inode from a list in the mount point and + * destroy the inode structures. + */ +void +tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node) +{ + size_t objsz; + + rw_enter_write(&tmp->tm_lock); + LIST_REMOVE(node, tn_entries); + rw_exit_write(&tmp->tm_lock); + + switch (node->tn_type) { + case VLNK: + if (node->tn_size > 0) { + KASSERT(node->tn_size <= SIZE_MAX); + tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link, + node->tn_size); + } + break; + case VREG: + /* + * Calculate the size of inode data, decrease the used-memory + * counter, and destroy the underlying UVM object (if any). + */ + objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages; + if (objsz != 0) { + tmpfs_mem_decr(tmp, objsz); + } + if (node->tn_spec.tn_reg.tn_aobj != NULL) { + uao_detach(node->tn_spec.tn_reg.tn_aobj); + node->tn_spec.tn_reg.tn_aobj = NULL; + } + break; + case VDIR: + /* + * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); + * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL || + * node == tmp->tm_root); + */ + break; + default: + break; + } + + /* mutex_destroy(&node->tn_nlock); */ + tmpfs_node_put(tmp, node); +} + +/* + * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode. + * + * => Must be called with tmpfs_node_t::tn_nlock held. + * => Returns vnode (*vpp) locked. + */ +int +tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, struct vnode **vpp) +{ + struct vnode *vp, *nvp; + /* kmutex_t *slock; */ + int error; +again: + /* If there is already a vnode, try to reclaim it. */ + if ((vp = node->tn_vnode) != NULL) { + /* atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); */ + node->tn_gen |= TMPFS_RECLAIMING_BIT; + rw_exit_write(&node->tn_nlock); + error = vget(vp, LK_EXCLUSIVE, curproc); + if (error == ENOENT) { + rw_enter_write(&node->tn_nlock); + goto again; + } + /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */ + node->tn_gen &= ~TMPFS_RECLAIMING_BIT; + *vpp = vp; + return error; + } + if (TMPFS_NODE_RECLAIMING(node)) { + /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */ + node->tn_gen &= ~TMPFS_RECLAIMING_BIT; + } + + /* + * Get a new vnode and associate it with our inode. Share the + * lock with underlying UVM object, if there is one (VREG case). + */ +#if 0 + if (node->tn_type == VREG) { + struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; + slock = uobj->vmobjlock; + } else { + slock = NULL; + } +#endif + error = getnewvnode(VT_TMPFS, mp, &tmpfs_vops, &vp); + if (error) { + rw_exit_write(&node->tn_nlock); + return error; + } + + lockinit(&node->tn_vlock, PINOD, "tnode", 0, 0); + vp->v_type = node->tn_type; + + /* Type-specific initialization. */ + switch (node->tn_type) { + case VBLK: + case VCHR: + vp->v_op = &tmpfs_specvops; + if ((nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp))) { + nvp->v_data = vp->v_data; + vp->v_data = NULL; + vp->v_op = &spec_vops; + vrele(vp); + vgone(vp); + vp = nvp; + node->tn_vnode = vp; + } + break; + case VDIR: + vp->v_flag |= node->tn_spec.tn_dir.tn_parent == node ? + VROOT : 0; + break; +#ifdef FIFO + case VFIFO: + vp->v_op = &tmpfs_fifovops; + break; +#endif + case VLNK: + case VREG: + case VSOCK: + break; + default: + KASSERT(0); + } + + uvm_vnp_setsize(vp, node->tn_size); + vp->v_data = node; + node->tn_vnode = vp; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); + rw_exit_write(&node->tn_nlock); + + KASSERT(VOP_ISLOCKED(vp)); + *vpp = vp; + return 0; +} + +/* + * tmpfs_alloc_file: allocate a new file of specified type and adds it + * into the parent directory. + * + * => Credentials of the caller are used. + */ +int +tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, + struct componentname *cnp, char *target) +{ + tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node; + tmpfs_dirent_t *de; + int error; + + KASSERT(VOP_ISLOCKED(dvp)); + *vpp = NULL; + + /* Check for the maximum number of links limit. */ + if (vap->va_type == VDIR) { + /* Check for maximum links limit. */ + if (dnode->tn_links == LINK_MAX) { + error = EMLINK; + goto out; + } + KASSERT(dnode->tn_links < LINK_MAX); + } + + /* Allocate a node that represents the new file. */ + error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid, + dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node); + if (error) + goto out; + + /* Allocate a directory entry that points to the new file. */ + error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de); + if (error) { + tmpfs_free_node(tmp, node); + goto out; + } + + /* Get a vnode for the new file. */ + rw_enter_write(&node->tn_nlock); + error = tmpfs_vnode_get(dvp->v_mount, node, vpp); + if (error) { + tmpfs_free_dirent(tmp, de); + tmpfs_free_node(tmp, node); + goto out; + } + +#if 0 /* ISWHITEOUT doesn't exist in OpenBSD */ + /* Remove whiteout before adding the new entry. */ + if (cnp->cn_flags & ISWHITEOUT) { + wde = tmpfs_dir_lookup(dnode, cnp); + KASSERT(wde != NULL && wde->td_node == TMPFS_NODE_WHITEOUT); + tmpfs_dir_detach(dvp, wde); + tmpfs_free_dirent(tmp, wde); + } +#endif + + /* Associate inode and attach the entry into the directory. */ + tmpfs_dir_attach(dvp, de, node); + +#if 0 /* ISWHITEOUT doesn't exist in OpenBSD */ + /* Make node opaque if requested. */ + if (cnp->cn_flags & ISWHITEOUT) + node->tn_flags |= UF_OPAQUE; +#endif + +out: + if (error == 0 && (cnp->cn_flags & SAVESTART) == 0) + pool_put(&namei_pool, cnp->cn_pnbuf); + vput(dvp); + return error; +} + +/* + * tmpfs_alloc_dirent: allocates a new directory entry for the inode. + * The directory entry contains a path name component. + */ +int +tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len, + tmpfs_dirent_t **de) +{ + tmpfs_dirent_t *nde; + + nde = tmpfs_dirent_get(tmp); + if (nde == NULL) + return ENOSPC; + + nde->td_name = tmpfs_strname_alloc(tmp, len); + if (nde->td_name == NULL) { + tmpfs_dirent_put(tmp, nde); + return ENOSPC; + } + nde->td_namelen = len; + memcpy(nde->td_name, name, len); + + *de = nde; + return 0; +} + +/* + * tmpfs_free_dirent: free a directory entry. + */ +void +tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de) +{ + + /* KASSERT(de->td_node == NULL); */ + tmpfs_strname_free(tmp, de->td_name, de->td_namelen); + tmpfs_dirent_put(tmp, de); +} + +/* + * tmpfs_dir_attach: associate directory entry with a specified inode, + * and attach the entry into the directory, specified by vnode. + * + * => Increases link count on the associated node. + * => Increases link count on directory node, if our node is VDIR. + * It is caller's responsibility to check for the LINK_MAX limit. + * => Triggers kqueue events here. + */ +void +tmpfs_dir_attach(struct vnode *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node) +{ + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); + int events = NOTE_WRITE; + + KASSERT(VOP_ISLOCKED(dvp)); + + /* Associate directory entry and the inode. */ + de->td_node = node; + if (node != TMPFS_NODE_WHITEOUT) { + KASSERT(node->tn_links < LINK_MAX); + node->tn_links++; + + /* Save the hint (might overwrite). */ + node->tn_dirent_hint = de; + } + + /* Insert the entry to the directory (parent of inode). */ + TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); + dnode->tn_size += sizeof(tmpfs_dirent_t); + tmpfs_update(dnode, TMPFS_NODE_STATUSALL); + uvm_vnp_setsize(dvp, dnode->tn_size); + + if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) { + /* Set parent. */ + KASSERT(node->tn_spec.tn_dir.tn_parent == NULL); + node->tn_spec.tn_dir.tn_parent = dnode; + + /* Increase the link count of parent. */ + KASSERT(dnode->tn_links < LINK_MAX); + dnode->tn_links++; + events |= NOTE_LINK; + + TMPFS_VALIDATE_DIR(node); + } + VN_KNOTE(dvp, events); +} + +/* + * tmpfs_dir_detach: disassociate directory entry and its inode, + * and detach the entry from the directory, specified by vnode. + * + * => Decreases link count on the associated node. + * => Decreases the link count on directory node, if our node is VDIR. + * => Triggers kqueue events here. + */ +void +tmpfs_dir_detach(struct vnode *dvp, tmpfs_dirent_t *de) +{ + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); + tmpfs_node_t *node = de->td_node; + int events = NOTE_WRITE; + + KASSERT(VOP_ISLOCKED(dvp)); + + if (node != TMPFS_NODE_WHITEOUT) { + struct vnode *vp = node->tn_vnode; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Deassociate the inode and entry. */ + de->td_node = NULL; + node->tn_dirent_hint = NULL; + + KASSERT(node->tn_links > 0); + node->tn_links--; + if (vp) { + VN_KNOTE(vp, node->tn_links ? + NOTE_LINK : NOTE_DELETE); + } + + /* If directory - decrease the link count of parent. */ + if (node->tn_type == VDIR) { + KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); + node->tn_spec.tn_dir.tn_parent = NULL; + + KASSERT(dnode->tn_links > 0); + dnode->tn_links--; + events |= NOTE_LINK; + } + } + + /* Remove the entry from the directory. */ + if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) { + dnode->tn_spec.tn_dir.tn_readdir_lastn = 0; + dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; + } + TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries); + + dnode->tn_size -= sizeof(tmpfs_dirent_t); + tmpfs_update(dnode, TMPFS_NODE_STATUSALL); + uvm_vnp_setsize(dvp, dnode->tn_size); + VN_KNOTE(dvp, events); +} + +/* + * tmpfs_dir_lookup: find a directory entry in the specified inode. + * + * Note that the . and .. components are not allowed as they do not + * physically exist within directories. + */ +tmpfs_dirent_t * +tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp) +{ + const char *name = cnp->cn_nameptr; + const uint16_t nlen = cnp->cn_namelen; + tmpfs_dirent_t *de; + + KASSERT(VOP_ISLOCKED(node->tn_vnode)); + KASSERT(nlen != 1 || !(name[0] == '.')); + KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.')); + TMPFS_VALIDATE_DIR(node); + + TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { + if (de->td_namelen != nlen) + continue; + if (memcmp(de->td_name, name, nlen) != 0) + continue; + break; + } + tmpfs_update(node, TMPFS_NODE_ACCESSED); + return de; +} + +/* + * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to + * avoid unnecessary tmpds_dir_lookup(). + * + * => The vnode must be locked. + */ +tmpfs_dirent_t * +tmpfs_dir_cached(tmpfs_node_t *node) +{ + tmpfs_dirent_t *de = node->tn_dirent_hint; + + KASSERT(VOP_ISLOCKED(node->tn_vnode)); + + if (de == NULL) { + return NULL; + } + KASSERT(de->td_node == node); + + /* + * Directories always have a valid hint. For files, check if there + * are any hard links. If there are - hint might be invalid. + */ + return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de; +} + +/* + * tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a + * '.' entry for the given directory and returns it in the uio space. + */ +int +tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio) +{ + struct dirent *dentp; + int error; + + TMPFS_VALIDATE_DIR(node); + KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); + + /* dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); */ + dentp = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK); + dentp->d_fileno = node->tn_id; + dentp->d_type = DT_DIR; + dentp->d_namlen = 1; + dentp->d_name[0] = '.'; + dentp->d_name[1] = '\0'; + dentp->d_reclen = DIRENT_SIZE(dentp); + + if (dentp->d_reclen > uio->uio_resid) + error = -1; + else { + error = uiomove(dentp, dentp->d_reclen, uio); + if (error == 0) + uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; + } + tmpfs_update(node, TMPFS_NODE_ACCESSED); + /* kmem_free(dentp, sizeof(struct dirent)); */ + free(dentp, M_TEMP); + return error; +} + +/* + * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir. Creates a + * '..' entry for the given directory and returns it in the uio space. + */ +int +tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio) +{ + struct dirent *dentp; + int error; + + TMPFS_VALIDATE_DIR(node); + KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); + + /* dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); */ + dentp = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK); + dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; + dentp->d_type = DT_DIR; + dentp->d_namlen = 2; + dentp->d_name[0] = '.'; + dentp->d_name[1] = '.'; + dentp->d_name[2] = '\0'; + dentp->d_reclen = DIRENT_SIZE(dentp); + + if (dentp->d_reclen > uio->uio_resid) + error = -1; + else { + error = uiomove(dentp, dentp->d_reclen, uio); + if (error == 0) { + tmpfs_dirent_t *de; + + de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); + if (de == NULL) + uio->uio_offset = TMPFS_DIRCOOKIE_EOF; + else + uio->uio_offset = tmpfs_dircookie(de); + } + } + tmpfs_update(node, TMPFS_NODE_ACCESSED); + /* kmem_free(dentp, sizeof(struct dirent)); */ + free(dentp, M_TEMP); + return error; +} + +/* + * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie. + */ +tmpfs_dirent_t * +tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie) +{ + tmpfs_dirent_t *de; + + KASSERT(VOP_ISLOCKED(node->tn_vnode)); + + if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn && + node->tn_spec.tn_dir.tn_readdir_lastp != NULL) { + return node->tn_spec.tn_dir.tn_readdir_lastp; + } + TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { + if (tmpfs_dircookie(de) == cookie) { + break; + } + } + return de; +} + +/* + * tmpfs_dir_getdents: relper function for tmpfs_readdir. + * + * => Returns as much directory entries as can fit in the uio space. + * => The read starts at uio->uio_offset. + */ +int +tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp) +{ + tmpfs_dirent_t *de; + struct dirent *dentp; + off_t startcookie; + int error; + + KASSERT(VOP_ISLOCKED(node->tn_vnode)); + TMPFS_VALIDATE_DIR(node); + + /* + * Locate the first directory entry we have to return. We have cached + * the last readdir in the node, so use those values if appropriate. + * Otherwise do a linear scan to find the requested entry. + */ + startcookie = uio->uio_offset; + KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); + KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); + if (startcookie == TMPFS_DIRCOOKIE_EOF) { + return 0; + } else { + de = tmpfs_dir_lookupbycookie(node, startcookie); + } + if (de == NULL) { + return EINVAL; + } + + /* + * Read as much entries as possible; i.e., until we reach the end + * of the directory or we exhaust uio space. + */ + /* dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP); */ + dentp = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK); + do { + /* + * Create a dirent structure representing the current + * inode and fill it. + */ + if (de->td_node == TMPFS_NODE_WHITEOUT || 0) { + dentp->d_fileno = 1; + /* dentp->d_type = DT_WHT; */ + } else { + dentp->d_fileno = de->td_node->tn_id; + switch (de->td_node->tn_type) { + case VBLK: + dentp->d_type = DT_BLK; + break; + case VCHR: + dentp->d_type = DT_CHR; + break; + case VDIR: + dentp->d_type = DT_DIR; + break; + case VFIFO: + dentp->d_type = DT_FIFO; + break; + case VLNK: + dentp->d_type = DT_LNK; + break; + case VREG: + dentp->d_type = DT_REG; + break; + case VSOCK: + dentp->d_type = DT_SOCK; + break; + default: + KASSERT(0); + } + } + dentp->d_namlen = de->td_namelen; + KASSERT(de->td_namelen < sizeof(dentp->d_name)); + memcpy(dentp->d_name, de->td_name, de->td_namelen); + dentp->d_name[de->td_namelen] = '\0'; + dentp->d_reclen = DIRENT_SIZE(dentp); + + /* Stop reading if the directory entry we are treating is + * bigger than the amount of data that can be returned. */ + if (dentp->d_reclen > uio->uio_resid) { + error = -1; + break; + } + + /* + * Copy the new dirent structure into the output buffer and + * advance pointers. + */ + error = uiomove(dentp, dentp->d_reclen, uio); + + (*cntp)++; + de = TAILQ_NEXT(de, td_entries); + } while (error == 0 && uio->uio_resid > 0 && de != NULL); + + /* Update the offset and cache. */ + if (de == NULL) { + uio->uio_offset = TMPFS_DIRCOOKIE_EOF; + node->tn_spec.tn_dir.tn_readdir_lastn = 0; + node->tn_spec.tn_dir.tn_readdir_lastp = NULL; + } else { + node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset = + tmpfs_dircookie(de); + node->tn_spec.tn_dir.tn_readdir_lastp = de; + } + tmpfs_update(node, TMPFS_NODE_ACCESSED); + /* kmem_free(dentp, sizeof(struct dirent)); */ + free(dentp, M_TEMP); + return error; +} + +/* + * tmpfs_reg_resize: resize the underlying UVM object associated with the + * specified regular file. + */ + +int +tmpfs_reg_resize(struct vnode *vp, off_t newsize) +{ + tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj; + size_t newpages, oldpages, bytes; + off_t oldsize; + vaddr_t pgoff; + int error; + + KASSERT(vp->v_type == VREG); + KASSERT(newsize >= 0); + + oldsize = node->tn_size; + oldpages = round_page(oldsize) >> PAGE_SHIFT; + newpages = round_page(newsize) >> PAGE_SHIFT; + KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages); + + if (newpages > oldpages) { + /* Increase the used-memory counter if getting extra pages. */ + bytes = (newpages - oldpages) << PAGE_SHIFT; + if (tmpfs_mem_incr(tmp, bytes) == 0) + return ENOSPC; + if (uao_grow(uobj, newpages) != 0) { + tmpfs_mem_decr(tmp, bytes); + return ENOSPC; + } + } + + node->tn_spec.tn_reg.tn_aobj_pages = newpages; + node->tn_size = newsize; + uvm_vnp_setsize(vp, newsize); + uvm_vnp_uncache(vp); + + /* + * Free "backing store". + */ + if (newpages < oldpages) { + if (tmpfs_uio_cached(node)) + tmpfs_uio_uncache(node); + if (uao_shrink(uobj, newpages)) + panic("shrink failed"); + /* Decrease the used-memory counter. */ + tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT); + } + if (newsize > oldsize) { + if (tmpfs_uio_cached(node)) + tmpfs_uio_uncache(node); + pgoff = oldsize & PAGE_MASK; + if (pgoff != 0) { + /* + * Growing from an offset which is not at a page + * boundary; zero out unused bytes in current page. + */ + error = tmpfs_zeropg(node, trunc_page(oldsize), pgoff); + if (error) + panic("tmpfs_zeropg: error %d", error); + } + VN_KNOTE(vp, NOTE_EXTEND); + } + return 0; +} + +/* + * tmpfs_chflags: change flags of the given vnode. + * + */ +int +tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Disallow this operation if the file system is mounted read-only. */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return EROFS; + + if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred))) + return error; + + if (cred->cr_uid == 0) { + if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) && + securelevel > 0) + return EPERM; + node->tn_flags = flags; + } else { + if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) || + (flags & UF_SETTABLE) != flags) + return EPERM; + node->tn_flags &= SF_SETTABLE; + node->tn_flags |= (flags & UF_SETTABLE); + } + + tmpfs_update(node, TMPFS_NODE_CHANGED); + VN_KNOTE(vp, NOTE_ATTRIB); + return 0; +} + +/* + * tmpfs_chmod: change access mode on the given vnode. + * + */ +int +tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Disallow this operation if the file system is mounted read-only. */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return EROFS; + + /* Immutable or append-only files cannot be modified, either. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) + return EPERM; + + if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred))) + return error; + if (cred->cr_uid != 0) { + if (vp->v_type != VDIR && (mode & S_ISTXT)) + return EFTYPE; + if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) + return EPERM; + } + + node->tn_mode = (mode & ALLPERMS); + tmpfs_update(node, TMPFS_NODE_CHANGED); + if ((vp->v_flag & VTEXT) && (node->tn_mode & S_ISTXT) == 0) + uvm_vnp_uncache(vp); + VN_KNOTE(vp, NOTE_ATTRIB); + return 0; +} + +/* + * tmpfs_chown: change ownership of the given vnode. + * + * => At least one of uid or gid must be different than VNOVAL. + * => Attribute is unchanged for VNOVAL case. + */ +int +tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Assign default values if they are unknown. */ + KASSERT(uid != VNOVAL || gid != VNOVAL); + if (uid == VNOVAL) { + uid = node->tn_uid; + } + if (gid == VNOVAL) { + gid = node->tn_gid; + } + + /* Disallow this operation if the file system is mounted read-only. */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return EROFS; + + /* Immutable or append-only files cannot be modified, either. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) + return EPERM; + + if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid || + (gid != node->tn_gid && !groupmember(gid, cred))) && + (error = suser_ucred(cred))) + return error; + + node->tn_uid = uid; + node->tn_gid = gid; + tmpfs_update(node, TMPFS_NODE_CHANGED); + VN_KNOTE(vp, NOTE_ATTRIB); + return 0; +} + +/* + * tmpfs_chsize: change size of the given vnode. + */ +int +tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct proc *p) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + + KASSERT(VOP_ISLOCKED(vp)); + + /* Decide whether this is a valid operation based on the file type. */ + switch (vp->v_type) { + case VDIR: + return EISDIR; + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + return EROFS; + } + break; + case VBLK: + case VCHR: + case VFIFO: + /* + * Allow modifications of special files even if in the file + * system is mounted read-only (we are not modifying the + * files themselves, but the objects they represent). + */ + return 0; + default: + return EOPNOTSUPP; + } + + /* Immutable or append-only files cannot be modified, either. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) { + return EPERM; + } + + /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */ + return tmpfs_truncate(vp, size); +} + +/* + * tmpfs_chtimes: change access and modification times for vnode. + */ +int +tmpfs_chtimes(struct vnode *vp, const struct timespec *atime, + const struct timespec *mtime, int vaflags, struct ucred *cred, + struct proc *p) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Disallow this operation if the file system is mounted read-only. */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return EROFS; + + /* Immutable or append-only files cannot be modified, either. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) + return EPERM; + + if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)) && + ((vaflags & VA_UTIMES_NULL) == 0 || + (error = VOP_ACCESS(vp, VWRITE, cred, p)))) + return error; + + if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) + node->tn_atime = *atime; + + if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) + node->tn_mtime = *mtime; + VN_KNOTE(vp, NOTE_ATTRIB); + return 0; +} + +/* + * tmpfs_update: update timestamps, et al. + */ +void +tmpfs_update(tmpfs_node_t *node, int flags) +{ + struct timespec nowtm; + + nanotime(&nowtm); + + if (flags & TMPFS_NODE_ACCESSED) { + node->tn_atime = nowtm; + } + if (flags & TMPFS_NODE_MODIFIED) { + node->tn_mtime = nowtm; + } + if (flags & TMPFS_NODE_CHANGED) { + node->tn_ctime = nowtm; + } +} + +int +tmpfs_truncate(struct vnode *vp, off_t length) +{ + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int error; + + if (length < 0) { + error = EINVAL; + goto out; + } + if (node->tn_size == length) { + error = 0; + goto out; + } + error = tmpfs_reg_resize(vp, length); + if (error == 0) { + tmpfs_update(node, TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED); + } +out: + return error; +} + +int +tmpfs_uio_cached(tmpfs_node_t *node) +{ + int pgnum_valid = (node->tn_pgnum != (voff_t)-1); + int pgptr_valid = (node->tn_pgptr != (vaddr_t)NULL); + KASSERT(pgnum_valid == pgptr_valid); + return pgnum_valid && pgptr_valid; +} + +vaddr_t +tmpfs_uio_lookup(tmpfs_node_t *node, voff_t pgnum) +{ + if (tmpfs_uio_cached(node) == 1 && node->tn_pgnum == pgnum) + return node->tn_pgptr; + + return (vaddr_t)NULL; +} + +void +tmpfs_uio_uncache(tmpfs_node_t *node) +{ + KASSERT(node->tn_pgnum != (voff_t)-1); + KASSERT(node->tn_pgptr != (vaddr_t)NULL); + uvm_unmap(kernel_map, node->tn_pgptr, node->tn_pgptr + PAGE_SIZE); + node->tn_pgnum = (voff_t)-1; + node->tn_pgptr = (vaddr_t)NULL; +} + +void +tmpfs_uio_cache(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgptr) +{ + KASSERT(node->tn_pgnum == (voff_t)-1); + KASSERT(node->tn_pgptr == (vaddr_t)NULL); + node->tn_pgnum = pgnum; + node->tn_pgptr = pgptr; +} + +/* + * Be gentle to kernel_map, don't allow more than 4MB in a single transaction. + */ +#define TMPFS_UIO_MAXBYTES ((1 << 22) - PAGE_SIZE) + +int +tmpfs_uiomove(tmpfs_node_t *node, struct uio *uio, vsize_t len) +{ + vaddr_t va, pgoff; + int error, adv; + voff_t pgnum; + vsize_t sz; + + pgnum = trunc_page(uio->uio_offset); + pgoff = uio->uio_offset & PAGE_MASK; + + if (pgoff + len < PAGE_SIZE) { + va = tmpfs_uio_lookup(node, pgnum); + if (va != (vaddr_t)NULL) + return uiomove((void *)va + pgoff, len, uio); + } + + if (len >= TMPFS_UIO_MAXBYTES) { + sz = TMPFS_UIO_MAXBYTES; + adv = UVM_ADV_NORMAL; + } else { + sz = len; + adv = UVM_ADV_SEQUENTIAL; + } + + if (tmpfs_uio_cached(node)) + tmpfs_uio_uncache(node); + + uao_reference(node->tn_uobj); + + error = uvm_map(kernel_map, &va, round_page(pgoff + sz), node->tn_uobj, + trunc_page(uio->uio_offset), 0, UVM_MAPFLAG(UVM_PROT_RW, + UVM_PROT_RW, UVM_INH_NONE, adv, 0)); + if (error) { + uao_detach(node->tn_uobj); /* Drop reference. */ + return error; + } + + error = uiomove((void *)va + pgoff, sz, uio); + if (error == 0 && pgoff + sz < PAGE_SIZE) + tmpfs_uio_cache(node, pgnum, va); + else + uvm_unmap(kernel_map, va, va + round_page(pgoff + sz)); + + return error; +} + +int +tmpfs_zeropg(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgoff) +{ + vaddr_t va; + int error; + + KASSERT(tmpfs_uio_cached(node) == 0); + + uao_reference(node->tn_uobj); + + error = uvm_map(kernel_map, &va, PAGE_SIZE, node->tn_uobj, pgnum, 0, + UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, + 0)); + if (error) { + uao_detach(node->tn_uobj); /* Drop reference. */ + return error; + } + + bzero((void *)va + pgoff, PAGE_SIZE - pgoff); + uvm_unmap(kernel_map, va, va + PAGE_SIZE); + + return 0; +} + diff --git a/sys/tmpfs/tmpfs_vfsops.c b/sys/tmpfs/tmpfs_vfsops.c new file mode 100644 index 00000000000..738164c36f8 --- /dev/null +++ b/sys/tmpfs/tmpfs_vfsops.c @@ -0,0 +1,367 @@ +/* $NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $ */ + +/* + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Efficient memory file system. + * + * tmpfs is a file system that uses NetBSD's virtual memory sub-system + * (the well-known UVM) to store file data and metadata in an efficient + * way. This means that it does not follow the structure of an on-disk + * file system because it simply does not need to. Instead, it uses + * memory-specific data structures and algorithms to automatically + * allocate and release resources. + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.52 2011/09/27 01:10:43 christos Exp $"); +#endif + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/systm.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <tmpfs/tmpfs.h> + +/* MODULE(MODULE_CLASS_VFS, tmpfs, NULL); */ + +struct pool tmpfs_dirent_pool; +struct pool tmpfs_node_pool; + +int tmpfs_mount(struct mount *, const char *, void *, struct nameidata *, + struct proc *); +int tmpfs_start(struct mount *, int, struct proc *); +int tmpfs_unmount(struct mount *, int, struct proc *); +int tmpfs_root(struct mount *, struct vnode **); +int tmpfs_vget(struct mount *, ino_t, struct vnode **); +int tmpfs_fhtovp(struct mount *, struct fid *, struct vnode **); +int tmpfs_vptofh(struct vnode *, struct fid *); +int tmpfs_statfs(struct mount *, struct statfs *, struct proc *); +int tmpfs_sync(struct mount *, int, struct ucred *, struct proc *); +int tmpfs_init(struct vfsconf *); + +int +tmpfs_init(struct vfsconf *vfsp) +{ + + pool_init(&tmpfs_dirent_pool, sizeof(tmpfs_dirent_t), 0, 0, 0, + "tmpfs_dirent", &pool_allocator_nointr); + pool_init(&tmpfs_node_pool, sizeof(tmpfs_node_t), 0, 0, 0, + "tmpfs_node", &pool_allocator_nointr); + + return 0; +} + +int +tmpfs_mount(struct mount *mp, const char *path, void *data, + struct nameidata *ndp, struct proc *p) +{ + struct tmpfs_args args; + tmpfs_mount_t *tmp; + tmpfs_node_t *root; + uint64_t memlimit; + size_t len; + uint64_t nodes; + int error; + +#if 0 + /* Handle retrieval of mount point arguments. */ + if (mp->mnt_flag & MNT_GETARGS) { + if (mp->mnt_data == NULL) + return EIO; + tmp = VFS_TO_TMPFS(mp); + + args->ta_version = TMPFS_ARGS_VERSION; + args->ta_nodes_max = tmp->tm_nodes_max; + args->ta_size_max = tmp->tm_mem_limit; + + root = tmp->tm_root; + args->ta_root_uid = root->tn_uid; + args->ta_root_gid = root->tn_gid; + args->ta_root_mode = root->tn_mode; + + *data_len = sizeof(*args); + return 0; + } +#endif + + if (mp->mnt_flag & MNT_UPDATE) { + /* TODO */ + return EOPNOTSUPP; + } + + /* Prohibit mounts if there is not enough memory. */ + if (tmpfs_mem_info(1) < TMPFS_PAGES_RESERVED) + return EINVAL; + + error = copyin(data, &args, sizeof(struct tmpfs_args)); + if (error) + return error; + + /* Get the memory usage limit for this file-system. */ + if (args.ta_size_max < PAGE_SIZE) { + memlimit = UINT64_MAX; + } else { + memlimit = args.ta_size_max; + } + KASSERT(memlimit > 0); + + if (args.ta_nodes_max <= 3) { + nodes = 3 + (memlimit / 1024); + } else { + nodes = args.ta_nodes_max; + } + nodes = MIN(nodes, INT_MAX); + KASSERT(nodes >= 3); + + /* Allocate the tmpfs mount structure and fill it. */ + tmp = malloc(sizeof(tmpfs_mount_t), M_MISCFSMNT, M_WAITOK); + if (tmp == NULL) + return ENOMEM; + + tmp->tm_nodes_max = (ino_t)nodes; + tmp->tm_nodes_cnt = 0; + LIST_INIT(&tmp->tm_nodes); + + rw_init(&tmp->tm_lock, "tmplk"); + tmpfs_mntmem_init(tmp, memlimit); + + /* Allocate the root node. */ + error = tmpfs_alloc_node(tmp, VDIR, args.ta_root_uid, + args.ta_root_gid, args.ta_root_mode & ALLPERMS, NULL, + VNOVAL, &root); + KASSERT(error == 0 && root != NULL); + + /* + * Parent of the root inode is itself. Also, root inode has no + * directory entry (i.e. is never attached), thus hold an extra + * reference (link) for it. + */ + root->tn_links++; + root->tn_spec.tn_dir.tn_parent = root; + tmp->tm_root = root; + + mp->mnt_data = tmp; + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN; +#if 0 + mp->mnt_fs_bshift = PAGE_SHIFT; + mp->mnt_dev_bshift = DEV_BSHIFT; + mp->mnt_iflag |= IMNT_MPSAFE; +#endif + vfs_getnewfsid(mp); + + copystr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &len); + bzero(mp->mnt_stat.f_mntonname + len, MNAMELEN - len); + len = strlcpy(mp->mnt_stat.f_mntfromname, "tmpfs", MNAMELEN - 1); + bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len); + + return error; +} + +int +tmpfs_start(struct mount *mp, int flags, struct proc *p) +{ + + return 0; +} + +int +tmpfs_unmount(struct mount *mp, int mntflags, struct proc *p) +{ + tmpfs_mount_t *tmp; + tmpfs_node_t *node; + int error, flags = 0; + + /* Handle forced unmounts. */ + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* Finalize all pending I/O. */ + error = vflush(mp, NULL, flags); + if (error != 0) + return error; + + tmp = VFS_TO_TMPFS(mp); + + /* Destroy any existing inodes. */ + while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) { + if (node->tn_type == VDIR) { + tmpfs_dirent_t *de; + + /* Destroy any directory entries. */ + de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); + while (de != NULL) { + tmpfs_dirent_t *nde; + + nde = TAILQ_NEXT(de, td_entries); + tmpfs_free_dirent(tmp, de); + node->tn_size -= sizeof(tmpfs_dirent_t); + de = nde; + } + } + /* Removes inode from the list. */ + tmpfs_free_node(tmp, node); + } + + /* Throw away the tmpfs_mount structure. */ + tmpfs_mntmem_destroy(tmp); + /* mutex_destroy(&tmp->tm_lock); */ + free(tmp, M_MISCFSMNT); + mp->mnt_data = NULL; + + return 0; +} + +int +tmpfs_root(struct mount *mp, struct vnode **vpp) +{ + tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root; + + rw_enter_write(&node->tn_nlock); + return tmpfs_vnode_get(mp, node, vpp); +} + +int +tmpfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) +{ + + printf("tmpfs_vget called; need for it unknown yet\n"); + return EOPNOTSUPP; +} + +int +tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) +{ + tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp); + tmpfs_node_t *node; + tmpfs_fid_t tfh; + + if (fhp->fid_len != sizeof(tmpfs_fid_t)) { + return EINVAL; + } + memcpy(&tfh, fhp, sizeof(tmpfs_fid_t)); + + rw_enter_write(&tmp->tm_lock); + LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { + if (node->tn_id != tfh.tf_id) { + continue; + } + if (TMPFS_NODE_GEN(node) != tfh.tf_gen) { + continue; + } + rw_enter_write(&node->tn_nlock); + break; + } + rw_exit_write(&tmp->tm_lock); + + /* Will release the tn_nlock. */ + return node ? tmpfs_vnode_get(mp, node, vpp) : ESTALE; +} + +int +tmpfs_vptofh(struct vnode *vp, struct fid *fhp) +{ + tmpfs_fid_t tfh; + tmpfs_node_t *node; + + node = VP_TO_TMPFS_NODE(vp); + + memset(&tfh, 0, sizeof(tfh)); + tfh.tf_len = sizeof(tmpfs_fid_t); + tfh.tf_gen = TMPFS_NODE_GEN(node); + tfh.tf_id = node->tn_id; + memcpy(fhp, &tfh, sizeof(tfh)); + + return 0; +} + +int +tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p) +{ + tmpfs_mount_t *tmp; + fsfilcnt_t freenodes; + uint64_t avail; + + tmp = VFS_TO_TMPFS(mp); + + sbp->f_iosize = sbp->f_bsize = PAGE_SIZE; + + rw_enter_write(&tmp->tm_acc_lock); + avail = tmpfs_pages_avail(tmp); + sbp->f_blocks = (tmpfs_bytes_max(tmp) >> PAGE_SHIFT); + sbp->f_bfree = avail; + sbp->f_bavail = avail & INT64_MAX; /* f_bavail is int64_t */ + + freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt, + avail * PAGE_SIZE / sizeof(tmpfs_node_t)); + + sbp->f_files = tmp->tm_nodes_cnt + freenodes; + sbp->f_ffree = freenodes; + sbp->f_favail = freenodes & INT64_MAX; /* f_favail is int64_t */ + rw_exit_write(&tmp->tm_acc_lock); + + copy_statfs_info(sbp, mp); + + return 0; +} + +int +tmpfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +{ + + return 0; +} + +/* + * tmpfs vfs operations. + */ + +struct vfsops tmpfs_vfsops = { + tmpfs_mount, /* vfs_mount */ + tmpfs_start, /* vfs_start */ + tmpfs_unmount, /* vfs_unmount */ + tmpfs_root, /* vfs_root */ + (void *)eopnotsupp, /* vfs_quotactl */ + tmpfs_statfs, /* vfs_statfs */ + tmpfs_sync, /* vfs_sync */ + tmpfs_vget, /* vfs_vget */ + tmpfs_fhtovp, /* vfs_fhtovp */ + tmpfs_vptofh, /* vfs_vptofh */ + tmpfs_init, /* vfs_init */ + NULL, /* vfs_sysctl */ + (void *)eopnotsupp, +}; diff --git a/sys/tmpfs/tmpfs_vnops.c b/sys/tmpfs/tmpfs_vnops.c new file mode 100644 index 00000000000..2bf68a6e253 --- /dev/null +++ b/sys/tmpfs/tmpfs_vnops.c @@ -0,0 +1,2747 @@ +/* $NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $ */ + +/* + * Copyright (c) 2005, 2006, 2007, 2012 The NetBSD Foundation, Inc. + * Copyright (c) 2013 Pedro Martelletto + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program, and by Taylor R Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * tmpfs vnode interface. + */ + +#if 0 +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.100 2012/11/05 17:27:39 dholland Exp $"); +#endif + +#include <sys/param.h> +#include <sys/dirent.h> +#include <sys/fcntl.h> +#include <sys/event.h> +#include <sys/malloc.h> +#include <sys/namei.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/lockf.h> +#include <sys/poll.h> + +#include <uvm/uvm.h> + +#include <miscfs/fifofs/fifo.h> +#include <tmpfs/tmpfs_vnops.h> +#include <tmpfs/tmpfs.h> + +/* + * vnode operations vector used for files stored in a tmpfs file system. + */ +struct vops tmpfs_vops = { + .vop_lookup = tmpfs_lookup, + .vop_create = tmpfs_create, + .vop_mknod = tmpfs_mknod, + .vop_open = tmpfs_open, + .vop_close = tmpfs_close, + .vop_access = tmpfs_access, + .vop_getattr = tmpfs_getattr, + .vop_setattr = tmpfs_setattr, + .vop_read = tmpfs_read, + .vop_write = tmpfs_write, + .vop_ioctl = tmpfs_ioctl, + .vop_poll = tmpfs_poll, + .vop_kqfilter = vop_generic_kqfilter, + .vop_revoke = vop_generic_revoke, + .vop_fsync = tmpfs_fsync, + .vop_remove = tmpfs_remove, + .vop_link = tmpfs_link, + .vop_rename = tmpfs_rename, + .vop_mkdir = tmpfs_mkdir, + .vop_rmdir = tmpfs_rmdir, + .vop_symlink = tmpfs_symlink, + .vop_readdir = tmpfs_readdir, + .vop_readlink = tmpfs_readlink, + .vop_abortop = vop_generic_abortop, + .vop_inactive = tmpfs_inactive, + .vop_reclaim = tmpfs_reclaim, + .vop_lock = tmpfs_lock, + .vop_unlock = tmpfs_unlock, + .vop_bmap = vop_generic_bmap, + .vop_strategy = tmpfs_strategy, + .vop_print = tmpfs_print, + .vop_islocked = tmpfs_islocked, + .vop_pathconf = tmpfs_pathconf, + .vop_advlock = tmpfs_advlock, + .vop_bwrite = tmpfs_bwrite, +}; + +/* + * tmpfs_lookup: path name traversal routine. + * + * Arguments: dvp (directory being searched), vpp (result), + * cnp (component name - path). + * + * => Caller holds a reference and lock on dvp. + * => We return looked-up vnode (vpp) locked, with a reference held. + */ +int +tmpfs_lookup(void *v) +{ + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + const int lastcn = (cnp->cn_flags & ISLASTCN) != 0; + const int lockparent = (cnp->cn_flags & LOCKPARENT) != 0; + tmpfs_node_t *dnode, *tnode; + tmpfs_dirent_t *de; + int cachefound; + int error; + + KASSERT(VOP_ISLOCKED(dvp)); + + dnode = VP_TO_TMPFS_DIR(dvp); + cnp->cn_flags &= ~PDIRUNLOCK; + *vpp = NULL; + + /* Check accessibility of directory. */ + error = VOP_ACCESS(dvp, VEXEC, cred, curproc); + if (error) { + goto out; + } + + /* + * If requesting the last path component on a read-only file system + * with a write operation, deny it. + */ + if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { + error = EROFS; + goto out; + } + + /* + * Avoid doing a linear scan of the directory if the requested + * directory/name couple is already in the cache. + */ + cachefound = cache_lookup(dvp, vpp, cnp); + if (cachefound == ENOENT /* && *vpp == NULLVP */) + return ENOENT; /* Negative cache hit. */ + else if (cachefound != -1) + return 0; /* Found in cache. */ + + if (cnp->cn_flags & ISDOTDOT) { + tmpfs_node_t *pnode; + + /* + * Lookup of ".." case. + */ + if (lastcn && cnp->cn_nameiop == RENAME) { + error = EINVAL; + goto out; + } + KASSERT(dnode->tn_type == VDIR); + pnode = dnode->tn_spec.tn_dir.tn_parent; + if (pnode == NULL) { + error = ENOENT; + goto out; + } + + /* + * Lock the parent tn_nlock before releasing the vnode lock, + * and thus prevents parent from disappearing. + */ + rw_enter_write(&pnode->tn_nlock); + VOP_UNLOCK(dvp, 0, curproc); + + /* + * Get a vnode of the '..' entry and re-acquire the lock. + * Release the tn_nlock. + */ + error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curproc); + goto out; + + } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { + /* + * Lookup of "." case. + */ + if (lastcn && cnp->cn_nameiop == RENAME) { + error = EISDIR; + goto out; + } + vref(dvp); + *vpp = dvp; + error = 0; + goto done; + } + + /* + * Other lookup cases: perform directory scan. + */ + de = tmpfs_dir_lookup(dnode, cnp); + if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) { + /* + * The entry was not found in the directory. This is valid + * if we are creating or renaming an entry and are working + * on the last component of the path name. + */ + if (lastcn && (cnp->cn_nameiop == CREATE || + cnp->cn_nameiop == RENAME)) { + error = VOP_ACCESS(dvp, VWRITE, cred, curproc); + if (error) { + goto out; + } + /* + * We are creating an entry in the file system, so + * save its name for further use by tmpfs_create(). + */ + cnp->cn_flags |= SAVENAME; + error = EJUSTRETURN; + } else { + error = ENOENT; + } + if (de) { + KASSERT(de->td_node == TMPFS_NODE_WHITEOUT); + /* cnp->cn_flags |= ISWHITEOUT; */ + } + goto done; + } + + tnode = de->td_node; + + /* + * If it is not the last path component and found a non-directory + * or non-link entry (which may itself be pointing to a directory), + * raise an error. + */ + if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) { + error = ENOTDIR; + goto out; + } + + /* Check the permissions. */ + if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { + error = VOP_ACCESS(dvp, VWRITE, cred, curproc); + if (error) + goto out; + + /* + * If not root and directory is sticky, check for permission + * on directory or on file. This implements append-only + * directories. + */ + if ((dnode->tn_mode & S_ISTXT) != 0) { + if (cred->cr_uid != 0 && + cred->cr_uid != dnode->tn_uid && + cred->cr_uid != tnode->tn_uid) { + error = EPERM; + goto out; + } + } + + /* + * XXX pedro: We might need cn_nameptr later in tmpfs_remove() + * or tmpfs_rmdir() for a tmpfs_dir_lookup(). We should really + * get rid of SAVENAME at some point. + */ + if (cnp->cn_nameiop == DELETE) + cnp->cn_flags |= SAVENAME; + } + + /* Get a vnode for the matching entry. */ + rw_enter_write(&tnode->tn_nlock); + error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp); +done: + /* + * Cache the result, unless request was for creation (as it does + * not improve the performance). + */ + if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { + cache_enter(dvp, *vpp, cnp); + } +out: + /* + * If (1) we succeded, (2) found a distinct vnode to return and (3) were + * either explicitely told to keep the parent locked or are in the + * middle of a lookup, unlock the parent vnode. + */ + if ((error == 0 || error == EJUSTRETURN) && /* (1) */ + *vpp != dvp && /* (2) */ + (!lockparent || !lastcn)) { /* (3) */ + VOP_UNLOCK(dvp, 0, curproc); + cnp->cn_flags |= PDIRUNLOCK; + } else + KASSERT(VOP_ISLOCKED(dvp)); + + KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error); + + return error; +} + +int +tmpfs_create(void *v) +{ + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + + KASSERT(VOP_ISLOCKED(dvp)); + KASSERT(cnp->cn_flags & HASBUF); + KASSERT(vap->va_type == VREG || vap->va_type == VSOCK); + return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); +} + +int +tmpfs_mknod(void *v) +{ + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp, **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + enum vtype vt = vap->va_type; + int error; + + if (vt != VBLK && vt != VCHR && vt != VFIFO) { + vput(dvp); + return EINVAL; + } + + /* tmpfs_alloc_file() will unlock 'dvp'. */ + error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); + if (error) + return error; + + /* + * As in ufs_mknod(), remove inode so that it will be reloaded by + * VFS_VGET and checked to see if it is an alias of an existing entry + * in the vnode cache. + */ + vput(*vpp); + (*vpp)->v_type = VNON; + vgone(*vpp); + *vpp = NULL; + + return 0; +} + +int +tmpfs_open(void *v) +{ + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + mode_t mode = ap->a_mode; + tmpfs_node_t *node; + + KASSERT(VOP_ISLOCKED(vp)); + + node = VP_TO_TMPFS_NODE(vp); + if (node->tn_links < 1) { + /* + * The file is still active, but all its names have been + * removed (e.g. by a "rmdir $(pwd)"). It cannot be opened + * any more, as it is about to be destroyed. + */ + return ENOENT; + } + + /* If the file is marked append-only, deny write requests. */ + if ((node->tn_flags & APPEND) != 0 && + (mode & (FWRITE | O_APPEND)) == FWRITE) { + return EPERM; + } + return 0; +} + +int +tmpfs_close(void *v) +{ + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + KASSERT(VOP_ISLOCKED(vp)); + + return 0; +} + +int +tmpfs_access(void *v) +{ + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + mode_t mode = ap->a_mode; + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + const int writing = (mode & VWRITE) != 0; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Possible? */ + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { + return EROFS; + } + break; + case VBLK: + case VCHR: + case VSOCK: + case VFIFO: + break; + default: + return EINVAL; + } + if (writing && (node->tn_flags & IMMUTABLE) != 0) { + return EPERM; + } + + return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid, + mode, ap->a_cred)); +} + +int +tmpfs_getattr(void *v) +{ + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + + vattr_null(vap); + + vap->va_type = vp->v_type; + vap->va_mode = node->tn_mode; + vap->va_nlink = node->tn_links; + vap->va_uid = node->tn_uid; + vap->va_gid = node->tn_gid; + /* vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; */ + vap->va_fileid = node->tn_id; + vap->va_size = node->tn_size; + vap->va_blocksize = PAGE_SIZE; + vap->va_atime = node->tn_atime; + vap->va_mtime = node->tn_mtime; + vap->va_ctime = node->tn_ctime; + /* vap->va_birthtime = node->tn_birthtime; */ + vap->va_gen = TMPFS_NODE_GEN(node); + vap->va_flags = node->tn_flags; + vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? + node->tn_spec.tn_dev.tn_rdev : VNOVAL; + vap->va_bytes = round_page(node->tn_size); + vap->va_filerev = VNOVAL; + vap->va_vaflags = 0; + vap->va_spare = VNOVAL; /* XXX */ + + return 0; +} + +#define GOODTIME(tv) ((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL) +/* XXX Should this operation be atomic? I think it should, but code in + * XXX other places (e.g., ufs) doesn't seem to be... */ +int +tmpfs_setattr(void *v) +{ + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct proc *p = curproc; + int error = 0; + + KASSERT(VOP_ISLOCKED(vp)); + + /* Abort if any unsettable attribute is given. */ + if (vap->va_type != VNON || vap->va_nlink != VNOVAL || + vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL || + vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) || + vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL || + vap->va_bytes != VNOVAL) { + return EINVAL; + } + if (error == 0 && (vap->va_flags != VNOVAL)) + error = tmpfs_chflags(vp, vap->va_flags, cred, p); + + if (error == 0 && (vap->va_size != VNOVAL)) + error = tmpfs_chsize(vp, vap->va_size, cred, p); + + if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL)) + error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, p); + + if (error == 0 && (vap->va_mode != VNOVAL)) + error = tmpfs_chmod(vp, vap->va_mode, cred, p); + + if (error == 0 && (GOODTIME(&vap->va_atime) + || GOODTIME(&vap->va_mtime))) { + error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime, + vap->va_vaflags, cred, p); + if (error == 0) + return 0; + } + return error; +} + +int +tmpfs_read(void *v) +{ + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + /* const int ioflag = ap->a_ioflag; */ + tmpfs_node_t *node; + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + if (vp->v_type != VREG) { + return EISDIR; + } + if (uio->uio_offset < 0) { + return EINVAL; + } + + node = VP_TO_TMPFS_NODE(vp); + tmpfs_update(node, TMPFS_NODE_ACCESSED); + error = 0; + + while (error == 0 && uio->uio_resid > 0) { + vsize_t len; + + if (node->tn_size <= uio->uio_offset) { + break; + } + len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid); + if (len == 0) { + break; + } + error = tmpfs_uiomove(node, uio, len); + } + + return error; +} + +int +tmpfs_write(void *v) +{ + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + const int ioflag = ap->a_ioflag; + tmpfs_node_t *node; + off_t oldsize; + int extended; + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + node = VP_TO_TMPFS_NODE(vp); + oldsize = node->tn_size; + + if (uio->uio_offset < 0 || vp->v_type != VREG) { + error = EINVAL; + goto out; + } + if (uio->uio_resid == 0) { + error = 0; + goto out; + } + if (ioflag & IO_APPEND) { + uio->uio_offset = node->tn_size; + } + + extended = uio->uio_offset + uio->uio_resid > node->tn_size; + if (extended) { + error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid); + if (error) + goto out; + } + + error = 0; + while (error == 0 && uio->uio_resid > 0) { + vsize_t len; + + len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid); + if (len == 0) { + break; + } + error = tmpfs_uiomove(node, uio, len); + } + if (error) { + (void)tmpfs_reg_resize(vp, oldsize); + } + + tmpfs_update(node, TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | + (extended ? TMPFS_NODE_CHANGED : 0)); + if (extended) + VN_KNOTE(vp, NOTE_WRITE | NOTE_EXTEND); + else + VN_KNOTE(vp, NOTE_WRITE); +out: + if (error) { + KASSERT(oldsize == node->tn_size); + } else { + KASSERT(uio->uio_resid == 0); + } + return error; +} + +int +tmpfs_fsync(void *v) +{ + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_flags; + off_t a_offlo; + off_t a_offhi; + struct lwp *a_l; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + /* Nothing to do. Just update. */ + KASSERT(VOP_ISLOCKED(vp)); + return 0; +} + +/* + * tmpfs_remove: unlink a file. + * + * => Both directory (dvp) and file (vp) are locked. + * => We unlock and drop the reference on both. + */ +int +tmpfs_remove(void *v) +{ + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp, *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + tmpfs_node_t *node; + tmpfs_dirent_t *de; + int error; + + KASSERT(VOP_ISLOCKED(dvp)); + KASSERT(VOP_ISLOCKED(vp)); + KASSERT(cnp->cn_flags & HASBUF); + + if (vp->v_type == VDIR) { + error = EPERM; + goto out; + } + node = VP_TO_TMPFS_NODE(vp); + + /* Files marked as immutable or append-only cannot be deleted. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) { + error = EPERM; + goto out; + } + + /* Lookup the directory entry (check the cached hint first). */ + de = tmpfs_dir_cached(node); + if (de == NULL) { + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); + de = tmpfs_dir_lookup(dnode, cnp); + } + + KASSERT(de && de->td_node == node); + + /* + * Remove the entry from the directory (drops the link count) and + * destroy it or replace it with a whiteout. + * Note: the inode referred by it will not be destroyed + * until the vnode is reclaimed/recycled. + */ + tmpfs_dir_detach(dvp, de); + if (0 /* ap->a_cnp->cn_flags & DOWHITEOUT */) + tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + else + tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de); + error = 0; +out: + pool_put(&namei_pool, cnp->cn_pnbuf); + /* Drop the references and unlock the vnodes. */ + vput(vp); + if (dvp == vp) { + vrele(dvp); + } else { + vput(dvp); + } + return error; +} + +/* + * tmpfs_link: create a hard link. + */ +int +tmpfs_link(void *v) +{ + struct vop_link_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + tmpfs_node_t *dnode, *node; + tmpfs_dirent_t *de; + int error; + + KASSERT(dvp != vp); + KASSERT(VOP_ISLOCKED(dvp)); + + if (vp->v_type == VDIR) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return EPERM; + } + + if (dvp->v_mount != vp->v_mount) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return EXDEV; + } + + dnode = VP_TO_TMPFS_DIR(dvp); + node = VP_TO_TMPFS_NODE(vp); + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); + + /* Check for maximum number of links limit. */ + if (node->tn_links == LINK_MAX) { + error = EMLINK; + goto out; + } + KASSERT(node->tn_links < LINK_MAX); + + /* We cannot create links of files marked immutable or append-only. */ + if (node->tn_flags & (IMMUTABLE | APPEND)) { + error = EPERM; + goto out; + } + + /* Allocate a new directory entry to represent the inode. */ + error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), + cnp->cn_nameptr, cnp->cn_namelen, &de); + if (error) { + goto out; + } + + /* + * Insert the entry into the directory. + * It will increase the inode link count. + */ + tmpfs_dir_attach(dvp, de, node); + + /* Update the timestamps and trigger the event. */ + if (node->tn_vnode) { + VN_KNOTE(node->tn_vnode, NOTE_LINK); + } + tmpfs_update(node, TMPFS_NODE_CHANGED); + error = 0; +out: + VOP_UNLOCK(vp, 0, curproc); + vput(dvp); + return error; +} + +int +tmpfs_mkdir(void *v) +{ + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + + KASSERT(vap->va_type == VDIR); + return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); +} + +int +tmpfs_rmdir(void *v) +{ + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); + tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp); + tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp); + tmpfs_dirent_t *de; + int error = 0; + + KASSERT(VOP_ISLOCKED(dvp)); + KASSERT(VOP_ISLOCKED(vp)); + KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); + KASSERT(cnp->cn_flags & HASBUF); + + /* + * Directories with more than two non-whiteout + * entries ('.' and '..') cannot be removed. + */ + if (node->tn_size > 0) { + KASSERT(error == 0); + TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) { + if (de->td_node != TMPFS_NODE_WHITEOUT) { + error = ENOTEMPTY; + break; + } + } + if (error) + goto out; + } + + /* Lookup the directory entry (check the cached hint first). */ + de = tmpfs_dir_cached(node); + if (de == NULL) + de = tmpfs_dir_lookup(dnode, cnp); + + KASSERT(de && de->td_node == node); + + /* Check flags to see if we are allowed to remove the directory. */ + if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) { + error = EPERM; + goto out; + } + + /* Decrement the link count for the virtual '.' entry. */ + node->tn_links--; + tmpfs_update(node, TMPFS_NODE_STATUSALL); + + /* Detach the directory entry from the directory. */ + tmpfs_dir_detach(dvp, de); + + /* Purge the cache for parent. */ + cache_purge(dvp); + + /* + * Destroy the directory entry or replace it with a whiteout. + * Note: the inode referred by it will not be destroyed + * until the vnode is reclaimed. + */ + if (0 /* ap->a_cnp->cn_flags & DOWHITEOUT */) + tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + else + tmpfs_free_dirent(tmp, de); + + /* Destroy the whiteout entries from the node. */ + while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) { + KASSERT(de->td_node == TMPFS_NODE_WHITEOUT); + tmpfs_dir_detach(vp, de); + tmpfs_free_dirent(tmp, de); + } + + KASSERT(node->tn_links == 0); +out: + pool_put(&namei_pool, cnp->cn_pnbuf); + /* Release the nodes. */ + vput(dvp); + vput(vp); + return error; +} + +int +tmpfs_symlink(void *v) +{ + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + char *target = ap->a_target; + int error; + + KASSERT(vap->va_type == 0); + vap->va_type = VLNK; + + error = tmpfs_alloc_file(dvp, vpp, vap, cnp, target); + if (error == 0) + vput(*vpp); + + return 0; +} + +int +tmpfs_readdir(void *v) +{ + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + kauth_cred_t a_cred; + int *a_eofflag; + off_t **a_cookies; + int *ncookies; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + int *eofflag = ap->a_eofflag; + u_long **cookies = ap->a_cookies; + int *ncookies = ap->a_ncookies; + off_t startoff, cnt; + tmpfs_node_t *node; + int error; + + KASSERT(VOP_ISLOCKED(vp)); + + /* This operation only makes sense on directory nodes. */ + if (vp->v_type != VDIR) { + return ENOTDIR; + } + node = VP_TO_TMPFS_DIR(vp); + startoff = uio->uio_offset; + cnt = 0; + if (node->tn_links == 0) { + error = 0; + goto out; + } + + if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { + error = tmpfs_dir_getdotdent(node, uio); + if (error != 0) { + if (error == -1) + error = 0; + goto out; + } + cnt++; + } + if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { + error = tmpfs_dir_getdotdotdent(node, uio); + if (error != 0) { + if (error == -1) + error = 0; + goto out; + } + cnt++; + } + error = tmpfs_dir_getdents(node, uio, &cnt); + if (error == -1) { + error = 0; + } + KASSERT(error >= 0); +out: + if (eofflag != NULL) { + *eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); + } + if (error || cookies == NULL || ncookies == NULL) { + return error; + } + + /* Update NFS-related variables, if any. */ + off_t i, off = startoff; + tmpfs_dirent_t *de = NULL; + + *cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); + *ncookies = cnt; + + for (i = 0; i < cnt; i++) { + KASSERT(off != TMPFS_DIRCOOKIE_EOF); + if (off != TMPFS_DIRCOOKIE_DOT) { + if (off == TMPFS_DIRCOOKIE_DOTDOT) { + de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir); + } else if (de != NULL) { + de = TAILQ_NEXT(de, td_entries); + } else { + de = tmpfs_dir_lookupbycookie(node, off); + KASSERT(de != NULL); + de = TAILQ_NEXT(de, td_entries); + } + if (de == NULL) { + off = TMPFS_DIRCOOKIE_EOF; + } else { + off = tmpfs_dircookie(de); + } + } else { + off = TMPFS_DIRCOOKIE_DOTDOT; + } + (*cookies)[i] = off; + } + KASSERT(uio->uio_offset == off); + return error; +} + +int +tmpfs_readlink(void *v) +{ + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + kauth_cred_t a_cred; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + tmpfs_node_t *node; + int error; + + KASSERT(VOP_ISLOCKED(vp)); + KASSERT(uio->uio_offset == 0); + KASSERT(vp->v_type == VLNK); + + node = VP_TO_TMPFS_NODE(vp); + error = uiomove(node->tn_spec.tn_lnk.tn_link, + MIN(node->tn_size, uio->uio_resid), uio); + tmpfs_update(node, TMPFS_NODE_ACCESSED); + + return error; +} + +int +tmpfs_inactive(void *v) +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + int *a_recycle; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + tmpfs_node_t *node; + + KASSERT(VOP_ISLOCKED(vp)); + + node = VP_TO_TMPFS_NODE(vp); + + if (vp->v_type == VREG && tmpfs_uio_cached(node)) + tmpfs_uio_uncache(node); + + VOP_UNLOCK(vp, 0, curproc); + + /* + * If we are done with the node, reclaim it so that it can be reused + * immediately. + */ + if (node->tn_links == 0) + vrecycle(vp, curproc); + + return 0; +} + +int +tmpfs_reclaim(void *v) +{ + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount); + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + int racing; + + /* Disassociate inode from vnode. */ + rw_enter_write(&node->tn_nlock); + node->tn_vnode = NULL; + vp->v_data = NULL; + /* Check if tmpfs_vnode_get() is racing with us. */ + racing = TMPFS_NODE_RECLAIMING(node); + rw_exit_write(&node->tn_nlock); + + /* + * If inode is not referenced, i.e. no links, then destroy it. + * Note: if racing - inode is about to get a new vnode, leave it. + */ + if (node->tn_links == 0 && !racing) { + tmpfs_free_node(tmp, node); + } + return 0; +} + +int +tmpfs_pathconf(void *v) +{ + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + register_t *a_retval; + } */ *ap = v; + const int name = ap->a_name; + register_t *retval = ap->a_retval; + int error = 0; + + switch (name) { + case _PC_LINK_MAX: + *retval = LINK_MAX; + break; + case _PC_NAME_MAX: + *retval = TMPFS_MAXNAMLEN; + break; + case _PC_PATH_MAX: + *retval = PATH_MAX; + break; + case _PC_PIPE_BUF: + *retval = PIPE_BUF; + break; + case _PC_CHOWN_RESTRICTED: + *retval = 1; + break; + case _PC_NO_TRUNC: + *retval = 1; + break; + case _PC_SYNC_IO: + *retval = 1; + break; + case _PC_FILESIZEBITS: + *retval = sizeof(off_t) * CHAR_BIT; + break; + default: + error = EINVAL; + } + return error; +} + +int +tmpfs_advlock(void *v) +{ + struct vop_advlock_args /* { + struct vnode *a_vp; + void * a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + + return lf_advlock(&node->tn_lockf, node->tn_size, ap->a_id, ap->a_op, + ap->a_fl, ap->a_flags); +} + +#if 0 +int +tmpfs_getpages(void *v) +{ + struct vop_getpages_args /* { + struct vnode *a_vp; + voff_t a_offset; + struct vm_page **a_m; + int *a_count; + int a_centeridx; + vm_prot_t a_access_type; + int a_advice; + int a_flags; + } */ * const ap = v; + struct vnode *vp = ap->a_vp; + const voff_t offset = ap->a_offset; + struct vm_page **pgs = ap->a_m; + const int centeridx = ap->a_centeridx; + const vm_prot_t access_type = ap->a_access_type; + const int advice = ap->a_advice; + const int flags = ap->a_flags; + int error, npages = *ap->a_count; + tmpfs_node_t *node; + struct uvm_object *uobj; + + KASSERT(vp->v_type == VREG); + KASSERT(mutex_owned(vp->v_interlock)); + + node = VP_TO_TMPFS_NODE(vp); + uobj = node->tn_spec.tn_reg.tn_aobj; + + /* + * Currently, PGO_PASTEOF is not supported. + */ + if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) { + if ((flags & PGO_LOCKED) == 0) + mutex_exit(vp->v_interlock); + return EINVAL; + } + + if (vp->v_size < offset + (npages << PAGE_SHIFT)) { + npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT; + } + + if ((flags & PGO_LOCKED) != 0) + return EBUSY; + + if ((flags & PGO_NOTIMESTAMP) == 0) { + if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) + node->tn_status |= TMPFS_NODE_ACCESSED; + + if ((access_type & VM_PROT_WRITE) != 0) { + node->tn_status |= TMPFS_NODE_MODIFIED; + if (vp->v_mount->mnt_flag & MNT_RELATIME) + node->tn_status |= TMPFS_NODE_ACCESSED; + } + } + + /* + * Invoke the pager. + * + * Clean the array of pages before. XXX: PR/32166 + * Note that vnode lock is shared with underlying UVM object. + */ + if (pgs) { + memset(pgs, 0, sizeof(struct vm_pages *) * npages); + } + KASSERT(vp->v_interlock == uobj->vmobjlock); + + error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx, + access_type, advice, flags | PGO_ALLPAGES); + +#if defined(DEBUG) + if (!error && pgs) { + for (int i = 0; i < npages; i++) { + KASSERT(pgs[i] != NULL); + } + } +#endif + return error; +} + +int +tmpfs_putpages(void *v) +{ + struct vop_putpages_args /* { + struct vnode *a_vp; + voff_t a_offlo; + voff_t a_offhi; + int a_flags; + } */ * const ap = v; + struct vnode *vp = ap->a_vp; + const voff_t offlo = ap->a_offlo; + const voff_t offhi = ap->a_offhi; + const int flags = ap->a_flags; + tmpfs_node_t *node; + struct uvm_object *uobj; + int error; + + KASSERT(mutex_owned(vp->v_interlock)); + + if (vp->v_type != VREG) { + mutex_exit(vp->v_interlock); + return 0; + } + + node = VP_TO_TMPFS_NODE(vp); + uobj = node->tn_spec.tn_reg.tn_aobj; + + KASSERT(vp->v_interlock == uobj->vmobjlock); + error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags); + + /* XXX mtime */ + + return error; +} + +int +tmpfs_whiteout(void *v) +{ + struct vop_whiteout_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + int a_flags; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + const int flags = ap->a_flags; + tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount); + tmpfs_dirent_t *de; + int error; + + switch (flags) { + case LOOKUP: + break; + case CREATE: + error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, + cnp->cn_namelen, &de); + if (error) + return error; + tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT); + break; + case DELETE: + cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */ + de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp); + if (de == NULL) + return ENOENT; + tmpfs_dir_detach(dvp, de); + tmpfs_free_dirent(tmp, de); + break; + } + return 0; +} +#endif + +int +tmpfs_print(void *v) +{ + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp); + + printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n" + "\tmode 0%o, owner %d, group %d, size %lld", + node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid, + node->tn_gid, node->tn_size); +#ifdef FIFO + if (vp->v_type == VFIFO) + fifo_printinfo(vp); +#endif + printf("\n"); + return 0; +} + +/* a null op */ +int +tmpfs_bwrite(void *v) +{ + return 0; +} + +int +tmpfs_poll(void *v) +{ + struct vop_poll_args *ap = v; + return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); +} + +int +tmpfs_strategy(void *v) +{ + return EOPNOTSUPP; +} + +int +tmpfs_ioctl(void *v) +{ + return ENOTTY; +} + +int +tmpfs_lock(void *v) +{ + struct vop_lock_args *ap = v; + tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp); + + return lockmgr(&tnp->tn_vlock, ap->a_flags, NULL); +} + +int +tmpfs_unlock(void *v) +{ + struct vop_unlock_args *ap = v; + tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp); + + return lockmgr(&tnp->tn_vlock, ap->a_flags | LK_RELEASE, NULL); +} + +int +tmpfs_islocked(void *v) +{ + struct vop_islocked_args *ap = v; + tmpfs_node_t *tnp = VP_TO_TMPFS_NODE(ap->a_vp); + + return lockstatus(&tnp->tn_vlock); +} + +/* + * tmpfs_rename: rename routine, the hairiest system call, with the + * insane API. + * + * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent) + * and tvp (to-leaf), if exists (NULL if not). + * + * => Caller holds a reference on fdvp and fvp, they are unlocked. + * Note: fdvp and fvp can refer to the same object (i.e. when it is root). + * + * => Both tdvp and tvp are referenced and locked. It is our responsibility + * to release the references and unlock them (or destroy). + */ + +/* + * First, some forward declarations of subroutines. + */ + +int tmpfs_sane_rename(struct vnode *, struct componentname *, + struct vnode *, struct componentname *, struct ucred *, int); +int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *, + struct ucred *, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **); +int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *, + struct ucred *, + struct vnode *, struct tmpfs_node *, + struct componentname *, struct tmpfs_dirent **, struct vnode **, + struct componentname *, struct tmpfs_dirent **, struct vnode **); +int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *, + struct ucred *, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, + struct tmpfs_dirent **, struct vnode **); +void tmpfs_rename_exit(struct tmpfs_mount *, + struct vnode *, struct vnode *, struct vnode *, struct vnode *); +int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *); +int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *, + struct tmpfs_node **); +int tmpfs_rename_lock(struct mount *, struct ucred *, int, + struct vnode *, struct tmpfs_node *, struct componentname *, int, + struct tmpfs_dirent **, struct vnode **, + struct vnode *, struct tmpfs_node *, struct componentname *, int, + struct tmpfs_dirent **, struct vnode **); +void tmpfs_rename_attachdetach(struct tmpfs_mount *, + struct vnode *, struct tmpfs_dirent *, struct vnode *, + struct vnode *, struct tmpfs_dirent *, struct vnode *); +int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *, + struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, struct ucred *); +int tmpfs_rename_check_possible(struct tmpfs_node *, + struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *); +int tmpfs_rename_check_permitted(struct ucred *, + struct tmpfs_node *, struct tmpfs_node *, + struct tmpfs_node *, struct tmpfs_node *); +int tmpfs_remove_check_possible(struct tmpfs_node *, + struct tmpfs_node *); +int tmpfs_remove_check_permitted(struct ucred *, + struct tmpfs_node *, struct tmpfs_node *); +int tmpfs_check_sticky(struct ucred *, + struct tmpfs_node *, struct tmpfs_node *); +void tmpfs_rename_cache_purge(struct vnode *, struct vnode *, struct vnode *, + struct vnode *); + +int +tmpfs_rename(void *v) +{ + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap = v; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *fvp = ap->a_fvp; + struct componentname *fcnp = ap->a_fcnp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *tvp = ap->a_tvp; + struct componentname *tcnp = ap->a_tcnp; + struct ucred *cred; + int error; + + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(fcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + KASSERT(tdvp != NULL); + KASSERT(tcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); + KASSERT(fcnp->cn_flags & HASBUF); + KASSERT(tcnp->cn_flags & HASBUF); + + cred = fcnp->cn_cred; + KASSERT(tcnp->cn_cred == cred); + + /* + * Check for cross-device rename. + */ + if (fvp->v_mount != tdvp->v_mount || + (tvp != NULL && (fvp->v_mount != tvp->v_mount))) { + VOP_ABORTOP(tdvp, tcnp); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp != NULL) + vput(tvp); + VOP_ABORTOP(fdvp, fcnp); + vrele(fdvp); + vrele(fvp); + return EXDEV; + } + + /* + * Sanitize our world from the VFS insanity. Unlock the target + * directory and node, which are locked. Release the children, + * which are referenced. Check for rename("x", "y/."), which + * it is our responsibility to reject, not the caller's. (But + * the caller does reject rename("x/.", "y"). Go figure.) + */ + + VOP_UNLOCK(tdvp, 0, curproc); + if ((tvp != NULL) && (tvp != tdvp)) + VOP_UNLOCK(tvp, 0, curproc); + + vrele(fvp); + if (tvp != NULL) + vrele(tvp); + + if (tvp == tdvp) { + error = EINVAL; + goto out; + } + + error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, 0); + +out: /* + * All done, whether with success or failure. Release the + * directory nodes now, as the caller expects from the VFS + * protocol. + */ + vrele(fdvp); + vrele(tdvp); + + return error; +} + +/* + * tmpfs_sane_rename: rename routine, the hairiest system call, with + * the sane API. + * + * Arguments: + * + * . fdvp (from directory vnode), + * . fcnp (from component name), + * . tdvp (to directory vnode), and + * . tcnp (to component name). + * + * fdvp and tdvp must be referenced and unlocked. + */ +int +tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp, + struct vnode *tdvp, struct componentname *tcnp, struct ucred *cred, + int posixly_correct) +{ + struct mount *mount; + struct tmpfs_mount *tmpfs; + struct tmpfs_node *fdnode, *tdnode; + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + char *newname; + int error; + + KASSERT(fdvp != NULL); + KASSERT(fcnp != NULL); + KASSERT(tdvp != NULL); + KASSERT(tcnp != NULL); + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); + KASSERT(fdvp->v_mount == tdvp->v_mount); + KASSERT((fcnp->cn_flags & ISDOTDOT) == 0); + KASSERT((tcnp->cn_flags & ISDOTDOT) == 0); + KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.')); + KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.')); + KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') || + (fcnp->cn_nameptr[1] != '.')); + KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') || + (tcnp->cn_nameptr[1] != '.')); + + /* + * Pull out the tmpfs data structures. + */ + fdnode = VP_TO_TMPFS_NODE(fdvp); + tdnode = VP_TO_TMPFS_NODE(tdvp); + KASSERT(fdnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode->tn_vnode == fdvp); + KASSERT(tdnode->tn_vnode == tdvp); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + mount = fdvp->v_mount; + KASSERT(mount != NULL); + KASSERT(mount == tdvp->v_mount); + /* XXX How can we be sure this stays true? (Not that you're + * likely to mount a tmpfs read-only...) */ + KASSERT((mount->mnt_flag & MNT_RDONLY) == 0); + tmpfs = VFS_TO_TMPFS(mount); + KASSERT(tmpfs != NULL); + + /* + * Decide whether we need a new name, and allocate memory for + * it if so. Do this before locking anything or taking + * destructive actions so that we can back out safely and sleep + * safely. XXX Is sleeping an issue here? Can this just be + * moved into tmpfs_rename_attachdetach? + */ + if (tmpfs_strname_neqlen(fcnp, tcnp)) { + newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen); + if (newname == NULL) { + error = ENOSPC; + goto out_unlocked; + } + } else { + newname = NULL; + } + + /* + * Lock and look up everything. GCC is not very clever. + */ + fde = tde = NULL; + fvp = tvp = NULL; + error = tmpfs_rename_enter(mount, tmpfs, cred, + fdvp, fdnode, fcnp, &fde, &fvp, + tdvp, tdnode, tcnp, &tde, &tvp); + if (error) + goto out_unlocked; + + /* + * Check that everything is locked and looks right. + */ + KASSERT(fde != NULL); + KASSERT(fvp != NULL); + KASSERT(fde->td_node != NULL); + KASSERT(fde->td_node->tn_vnode == fvp); + KASSERT(fde->td_node->tn_type == fvp->v_type); + KASSERT((tde == NULL) == (tvp == NULL)); + KASSERT((tde == NULL) || (tde->td_node != NULL)); + KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp)); + KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + + /* + * If the source and destination are the same object, we need + * only at most delete the source entry. + */ + if (fvp == tvp) { + KASSERT(tvp != NULL); + if (fde->td_node->tn_type == VDIR) { + /* XXX How can this possibly happen? */ + error = EINVAL; + goto out_locked; + } + if (!posixly_correct && (fde != tde)) { + /* XXX Doesn't work because of locking. + * error = VOP_REMOVE(fdvp, fvp); + */ + error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp, + cred); + if (error) + goto out_locked; + } + goto success; + } + KASSERT(fde != tde); + KASSERT(fvp != tvp); + + /* + * If the target exists, refuse to rename a directory over a + * non-directory or vice versa, or to clobber a non-empty + * directory. + */ + if (tvp != NULL) { + KASSERT(tde != NULL); + KASSERT(tde->td_node != NULL); + if (fvp->v_type == VDIR && tvp->v_type == VDIR) + error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0); + else if (fvp->v_type == VDIR && tvp->v_type != VDIR) + error = ENOTDIR; + else if (fvp->v_type != VDIR && tvp->v_type == VDIR) + error = EISDIR; + else + error = 0; + if (error) + goto out_locked; + KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR)); + } + + /* + * Authorize the rename. + */ + error = tmpfs_rename_check_possible(fdnode, fde->td_node, + tdnode, (tde? tde->td_node : NULL)); + if (error) + goto out_locked; + error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node, + tdnode, (tde? tde->td_node : NULL)); + if (error) + goto out_locked; + + /* + * Everything is hunky-dory. Shuffle the directory entries. + */ + tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp); + + /* + * Update the directory entry's name necessary, and flag + * metadata updates. A memory allocation failure here is not + * OK because we've already committed some changes that we + * can't back out at this point, and we have things locked so + * we can't sleep, hence the early allocation above. + */ + if (newname != NULL) { + KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN); + + tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen); + fde->td_namelen = (uint16_t)tcnp->cn_namelen; + (void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen); + /* Commit newname and don't free it on the way out. */ + fde->td_name = newname; + newname = NULL; + + tmpfs_update(fde->td_node, TMPFS_NODE_CHANGED); + tmpfs_update(tdnode, TMPFS_NODE_MODIFIED); + } + +success: + VN_KNOTE(fvp, NOTE_RENAME); + tmpfs_rename_cache_purge(fdvp, fvp, tdvp, tvp); + error = 0; + +out_locked: + tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp); + +out_unlocked: + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ + /* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */ + /* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ + + if (newname != NULL) + tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen); + + return error; +} + +/* + * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret + * and the associated vnode in fvp_ret; fail if not found. Look up + * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the + * associated vnode in tvp_ret; store null instead if not found. Fail + * if anything has been mounted on any of the nodes involved. + * + * fdvp and tdvp must be referenced. + * + * On entry, nothing is locked. + * + * On success, everything is locked, and *fvp_ret, and *tvp_ret if + * nonnull, are referenced. The only pairs of vnodes that may be + * identical are {fdvp, tdvp} and {fvp, tvp}. + * + * On failure, everything remains as was. + * + * Locking everything including the source and target nodes is + * necessary to make sure that, e.g., link count updates are OK. The + * locking order is, in general, ancestor-first, matching the order you + * need to use to look up a descendant anyway. + */ +int +tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs, + struct ucred *cred, + struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + int error; + + KASSERT(mount != NULL); + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fdnode != NULL); + KASSERT(fcnp != NULL); + KASSERT(fde_ret != NULL); + KASSERT(fvp_ret != NULL); + KASSERT(tdvp != NULL); + KASSERT(tdnode != NULL); + KASSERT(tcnp != NULL); + KASSERT(tde_ret != NULL); + KASSERT(tvp_ret != NULL); + KASSERT(fdnode->tn_vnode == fdvp); + KASSERT(tdnode->tn_vnode == tdvp); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + if (fdvp == tdvp) { + KASSERT(fdnode == tdnode); + error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp, + fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret); + } else { + KASSERT(fdnode != tdnode); + error = tmpfs_rename_enter_separate(mount, tmpfs, cred, + fdvp, fdnode, fcnp, fde_ret, fvp_ret, + tdvp, tdnode, tcnp, tde_ret, tvp_ret); + } + + if (error) + return error; + + KASSERT(*fde_ret != NULL); + KASSERT(*fvp_ret != NULL); + KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL)); + KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL)); + KASSERT((*tde_ret == NULL) || + ((*tde_ret)->td_node->tn_vnode == *tvp_ret)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT((*tvp_ret == NULL) || + (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE)); + KASSERT(*fvp_ret != fdvp); + KASSERT(*fvp_ret != tdvp); + KASSERT(*tvp_ret != fdvp); + KASSERT(*tvp_ret != tdvp); + return 0; +} + +/* + * Lock and look up with a common source/target directory. + */ +int +tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs, + struct ucred *cred, + struct vnode *dvp, struct tmpfs_node *dnode, + struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + int error; + + error = tmpfs_rename_lock_directory(dvp, dnode); + if (error) + goto fail0; + + /* Did we lose a race with mount? */ + if (dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail1; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(dvp, VEXEC, cred, curproc); + if (error) + goto fail1; + + /* + * The order in which we lock the source and target nodes is + * irrelevant because there can only be one rename on this + * directory in flight at a time, and we have it locked. + */ + + fde = tmpfs_dir_lookup(dnode, fcnp); + if (fde == NULL) { + error = ENOENT; + goto fail1; + } + + KASSERT(fde->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(fde->td_node != dnode); + /* We ruled out `..' earlier. */ + KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent); + rw_enter_write(&fde->td_node->tn_nlock); + error = tmpfs_vnode_get(mount, fde->td_node, &fvp); + if (error) + goto fail1; + KASSERT(fvp != NULL); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT(fvp != dvp); + KASSERT(fvp->v_mount == mount); + + /* Refuse to rename a mount point. */ + if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail2; + } + + tde = tmpfs_dir_lookup(dnode, tcnp); + if (tde == NULL) { + tvp = NULL; + } else { + KASSERT(tde->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(tde->td_node != dnode); + /* We ruled out `..' earlier. */ + KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent); + if (tde->td_node != fde->td_node) { + rw_enter_write(&tde->td_node->tn_nlock); + error = tmpfs_vnode_get(mount, tde->td_node, &tvp); + if (error) + goto fail2; + KASSERT(tvp->v_mount == mount); + /* Refuse to rename over a mount point. */ + if ((tvp->v_type == VDIR) && + (tvp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail3; + } + } else { + tvp = fvp; + vref(tvp); + } + KASSERT(tvp != NULL); + KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); + } + KASSERT(tvp != dvp); + + *fde_ret = fde; + *fvp_ret = fvp; + *tde_ret = tde; + *tvp_ret = tvp; + return 0; + +fail3: if (tvp != NULL) { + if (tvp != fvp) + vput(tvp); + else + vrele(tvp); + } + +fail2: vput(fvp); +fail1: VOP_UNLOCK(dvp, 0, curproc); +fail0: return error; +} + +/* + * Lock and look up with separate source and target directories. + */ +int +tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs, + struct ucred *cred, + struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp, + struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret, + struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp, + struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret) +{ + struct tmpfs_node *intermediate_node; + struct tmpfs_dirent *fde, *tde; + struct vnode *fvp, *tvp; + int error; + + KASSERT(fdvp != tdvp); + KASSERT(fdnode != tdnode); + +#if 0 /* XXX */ + mutex_enter(&tmpfs->tm_rename_lock); +#endif + + error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node); + if (error) + goto fail; + + /* + * intermediate_node == NULL means fdnode is not an ancestor of + * tdnode. + */ + if (intermediate_node == NULL) + error = tmpfs_rename_lock(mount, cred, ENOTEMPTY, + tdvp, tdnode, tcnp, 1, &tde, &tvp, + fdvp, fdnode, fcnp, 0, &fde, &fvp); + else + error = tmpfs_rename_lock(mount, cred, EINVAL, + fdvp, fdnode, fcnp, 0, &fde, &fvp, + tdvp, tdnode, tcnp, 1, &tde, &tvp); + if (error) + goto fail; + + KASSERT(fde != NULL); + KASSERT(fde->td_node != NULL); + + /* + * Reject rename("foo/bar", "foo/bar/baz/quux/zot"). + */ + if (fde->td_node == intermediate_node) { + tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp); + return EINVAL; + } + + *fde_ret = fde; + *fvp_ret = fvp; + *tde_ret = tde; + *tvp_ret = tvp; + return 0; + +fail: +#if 0 /* XXX */ + mutex_exit(&tmpfs->tm_rename_lock); +#endif + return error; +} + +/* + * Unlock everything we locked for rename. + * + * fdvp and tdvp must be referenced. + * + * On entry, everything is locked, and fvp and tvp referenced. + * + * On exit, everything is unlocked, and fvp and tvp are released. + */ +void +tmpfs_rename_exit(struct tmpfs_mount *tmpfs, + struct vnode *fdvp, struct vnode *fvp, + struct vnode *tdvp, struct vnode *tvp) +{ + + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(fdvp != fvp); + KASSERT(fdvp != tvp); + KASSERT(tdvp != tvp); + KASSERT(tdvp != fvp); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + + if (tvp != NULL) { + if (tvp != fvp) + vput(tvp); + else + vrele(tvp); + } + VOP_UNLOCK(tdvp, 0, curproc); + vput(fvp); + if (fdvp != tdvp) + VOP_UNLOCK(fdvp, 0, curproc); + +#if 0 /* XXX */ + if (fdvp != tdvp) + mutex_exit(&tmpfs->tm_rename_lock); +#endif +} + +/* + * Lock a directory, but fail if it has been rmdir'd. + * + * vp must be referenced. + */ +int +tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node) +{ + + KASSERT(vp != NULL); + KASSERT(node != NULL); + KASSERT(node->tn_vnode == vp); + KASSERT(node->tn_type == VDIR); + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); + if (node->tn_spec.tn_dir.tn_parent == NULL) { + VOP_UNLOCK(vp, 0, curproc); + return ENOENT; + } + + return 0; +} + +/* + * Analyze the genealogy of the source and target nodes. + * + * On success, stores in *intermediate_node_ret either the child of + * fdnode of which tdnode is a descendant, or null if tdnode is not a + * descendant of fdnode at all. + * + * fdnode and tdnode must be unlocked and referenced. The file + * system's rename lock must also be held, to exclude concurrent + * changes to the file system's genealogy other than rmdir. + * + * XXX This causes an extra lock/unlock of tdnode in the case when + * we're just about to lock it again before locking anything else. + * However, changing that requires reorganizing the code to make it + * even more horrifically obscure. + */ +int +tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode, + struct tmpfs_node **intermediate_node_ret) +{ + struct tmpfs_node *node = tdnode, *parent; + int error; + + KASSERT(fdnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != tdnode); + KASSERT(intermediate_node_ret != NULL); + + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT(fdnode->tn_type == VDIR); + KASSERT(tdnode->tn_type == VDIR); + + /* + * We need to provisionally lock tdnode->tn_vnode to keep rmdir + * from deleting it -- or any ancestor -- at an inopportune + * moment. + */ + error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode); + if (error) + return error; + + for (;;) { + parent = node->tn_spec.tn_dir.tn_parent; + KASSERT(parent != NULL); + KASSERT(parent->tn_type == VDIR); + + /* Did we hit the root without finding fdnode? */ + if (parent == node) { + *intermediate_node_ret = NULL; + break; + } + + /* Did we find that fdnode is an ancestor? */ + if (parent == fdnode) { + *intermediate_node_ret = node; + break; + } + + /* Neither -- keep ascending the family tree. */ + node = parent; + } + + VOP_UNLOCK(tdnode->tn_vnode, 0, curproc); + return 0; +} + +/* + * Lock directories a and b, which must be distinct, and look up and + * lock nodes a and b. Do a first and then b. Directory b may not be + * an ancestor of directory a, although directory a may be an ancestor + * of directory b. Fail with overlap_error if node a is directory b. + * Neither componentname may be `.' or `..'. + * + * a_dvp and b_dvp must be referenced. + * + * On entry, a_dvp and b_dvp are unlocked. + * + * On success, + * . a_dvp and b_dvp are locked, + * . *a_dirent_ret is filled with a directory entry whose node is + * locked and referenced, + * . *b_vp_ret is filled with the corresponding vnode, + * . *b_dirent_ret is filled either with null or with a directory entry + * whose node is locked and referenced, + * . *b_vp is filled either with null or with the corresponding vnode, + * and + * . the only pair of vnodes that may be identical is a_vp and b_vp. + * + * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret, + * *a_vp, *b_dirent_ret, and *b_vp are left alone. + */ +int +tmpfs_rename_lock(struct mount *mount, struct ucred *cred, int overlap_error, + struct vnode *a_dvp, struct tmpfs_node *a_dnode, + struct componentname *a_cnp, int a_missing_ok, + struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret, + struct vnode *b_dvp, struct tmpfs_node *b_dnode, + struct componentname *b_cnp, int b_missing_ok, + struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret) +{ + struct tmpfs_dirent *a_dirent, *b_dirent; + struct vnode *a_vp, *b_vp; + int error; + + KASSERT(a_dvp != NULL); + KASSERT(a_dnode != NULL); + KASSERT(a_cnp != NULL); + KASSERT(a_dirent_ret != NULL); + KASSERT(a_vp_ret != NULL); + KASSERT(b_dvp != NULL); + KASSERT(b_dnode != NULL); + KASSERT(b_cnp != NULL); + KASSERT(b_dirent_ret != NULL); + KASSERT(b_vp_ret != NULL); + KASSERT(a_dvp != b_dvp); + KASSERT(a_dnode != b_dnode); + KASSERT(a_dnode->tn_vnode == a_dvp); + KASSERT(b_dnode->tn_vnode == b_dvp); + KASSERT(a_dnode->tn_type == VDIR); + KASSERT(b_dnode->tn_type == VDIR); + KASSERT(a_missing_ok != b_missing_ok); + + error = tmpfs_rename_lock_directory(a_dvp, a_dnode); + if (error) + goto fail0; + + /* Did we lose a race with mount? */ + if (a_dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail1; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(a_dvp, VEXEC, cred, curproc); + if (error) + goto fail1; + + a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp); + if (a_dirent != NULL) { + KASSERT(a_dirent->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(a_dirent->td_node != a_dnode); + /* We ruled out `..' earlier. */ + KASSERT(a_dirent->td_node != + a_dnode->tn_spec.tn_dir.tn_parent); + if (a_dirent->td_node == b_dnode) { + error = overlap_error; + goto fail1; + } + rw_enter_write(&a_dirent->td_node->tn_nlock); + error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp); + if (error) + goto fail1; + KASSERT(a_vp->v_mount == mount); + /* Refuse to rename (over) a mount point. */ + if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail2; + } + } else if (!a_missing_ok) { + error = ENOENT; + goto fail1; + } else { + a_vp = NULL; + } + KASSERT(a_vp != a_dvp); + KASSERT(a_vp != b_dvp); + + error = tmpfs_rename_lock_directory(b_dvp, b_dnode); + if (error) + goto fail2; + + /* Did we lose a race with mount? */ + if (b_dvp->v_mountedhere != NULL) { + error = EBUSY; + goto fail3; + } + + /* Make sure the caller may read the directory. */ + error = VOP_ACCESS(b_dvp, VEXEC, cred, curproc); + if (error) + goto fail3; + + b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp); + if (b_dirent != NULL) { + KASSERT(b_dirent->td_node != NULL); + /* We ruled out `.' earlier. */ + KASSERT(b_dirent->td_node != b_dnode); + /* We ruled out `..' earlier. */ + KASSERT(b_dirent->td_node != + b_dnode->tn_spec.tn_dir.tn_parent); + /* b is not an ancestor of a. */ + KASSERT(b_dirent->td_node != a_dnode); + /* But the source and target nodes might be the same. */ + if ((a_dirent == NULL) || + (a_dirent->td_node != b_dirent->td_node)) { + rw_enter_write(&b_dirent->td_node->tn_nlock); + error = tmpfs_vnode_get(mount, b_dirent->td_node, + &b_vp); + if (error) + goto fail3; + KASSERT(b_vp->v_mount == mount); + KASSERT(a_vp != b_vp); + /* Refuse to rename (over) a mount point. */ + if ((b_vp->v_type == VDIR) && + (b_vp->v_mountedhere != NULL)) { + error = EBUSY; + goto fail4; + } + } else { + b_vp = a_vp; + vref(b_vp); + } + } else if (!b_missing_ok) { + error = ENOENT; + goto fail3; + } else { + b_vp = NULL; + } + KASSERT(b_vp != a_dvp); + KASSERT(b_vp != b_dvp); + + KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE); + KASSERT(a_missing_ok || (a_dirent != NULL)); + KASSERT(a_missing_ok || (a_dirent->td_node != NULL)); + KASSERT(b_missing_ok || (b_dirent != NULL)); + KASSERT(b_missing_ok || (b_dirent->td_node != NULL)); + KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL)); + KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp)); + KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL)); + KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp)); + KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE)); + KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE)); + + *a_dirent_ret = a_dirent; + *b_dirent_ret = b_dirent; + *a_vp_ret = a_vp; + *b_vp_ret = b_vp; + return 0; + +fail4: if (b_vp != NULL) { + KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE); + if (b_vp != a_vp) + vput(b_vp); + else + vrele(a_vp); + } + +fail3: KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE); + VOP_UNLOCK(b_dvp, 0, curproc); + +fail2: if (a_vp != NULL) { + KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE); + vput(a_vp); + } + +fail1: KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE); + VOP_UNLOCK(a_dvp, 0, curproc); + +fail0: /* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */ + /* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */ + /* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */ + return error; +} + +/* + * Shuffle the directory entries to move fvp from the directory fdvp + * into the directory tdvp. fde is fvp's directory entry in fdvp. If + * we are overwriting a target node, it is tvp, and tde is its + * directory entry in tdvp. + * + * fdvp, fvp, tdvp, and tvp must all be locked and referenced. + */ +void +tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs, + struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp, + struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp) +{ + + KASSERT(tmpfs != NULL); + KASSERT(fdvp != NULL); + KASSERT(fde != NULL); + KASSERT(fvp != NULL); + KASSERT(tdvp != NULL); + KASSERT(fde->td_node != NULL); + KASSERT(fde->td_node->tn_vnode == fvp); + KASSERT((tde == NULL) == (tvp == NULL)); + KASSERT((tde == NULL) || (tde->td_node != NULL)); + KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp)); + KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + + /* + * If we are moving from one directory to another, detach the + * source entry and reattach it to the target directory. + */ + if (fdvp != tdvp) { + /* tmpfs_dir_detach clobbers fde->td_node, so save it. */ + struct tmpfs_node *fnode = fde->td_node; + tmpfs_dir_detach(fdvp, fde); + tmpfs_dir_attach(tdvp, fde, fnode); + } else if (tvp == NULL) { + /* + * We are changing the directory. tmpfs_dir_attach and + * tmpfs_dir_detach note the events for us, but for + * this case we don't call them, so we must note the + * event explicitly. + */ + VN_KNOTE(fdvp, NOTE_WRITE); + } + + /* + * If we are replacing an existing target entry, delete it. + */ + if (tde != NULL) { + KASSERT(tvp != NULL); + KASSERT(tde->td_node != NULL); + KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR)); + if (tde->td_node->tn_type == VDIR) { + KASSERT(tde->td_node->tn_size == 0); + KASSERT(tde->td_node->tn_links == 2); + /* Decrement the extra link count for `.' so + * the vnode will be recycled when released. */ + tde->td_node->tn_links--; + } + tmpfs_dir_detach(tdvp, tde); + tmpfs_free_dirent(tmpfs, tde); + } +} + +/* + * Remove the entry de for the non-directory vp from the directory dvp. + * + * Everything must be locked and referenced. + */ +int +tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp, + struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp, + struct ucred *cred) +{ + int error; + + KASSERT(tmpfs != NULL); + KASSERT(dvp != NULL); + KASSERT(dnode != NULL); + KASSERT(de != NULL); + KASSERT(vp != NULL); + KASSERT(dnode->tn_vnode == dvp); + KASSERT(de->td_node != NULL); + KASSERT(de->td_node->tn_vnode == vp); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); + + error = tmpfs_remove_check_possible(dnode, de->td_node); + if (error) + return error; + + error = tmpfs_remove_check_permitted(cred, dnode, de->td_node); + if (error) + return error; + + /* + * If not root and directory is sticky, check for permission on + * directory or on file. This implements append-only directories. + */ + if ((dnode->tn_mode & S_ISTXT) != 0) + if (cred->cr_uid != 0 && cred->cr_uid != dnode->tn_uid && + cred->cr_uid != de->td_node->tn_uid) + return EPERM; + + tmpfs_dir_detach(dvp, de); + tmpfs_free_dirent(tmpfs, de); + + return 0; +} + +/* + * Check whether a rename is possible independent of credentials. + * + * Everything must be locked and referenced. + */ +int +tmpfs_rename_check_possible( + struct tmpfs_node *fdnode, struct tmpfs_node *fnode, + struct tmpfs_node *tdnode, struct tmpfs_node *tnode) +{ + + KASSERT(fdnode != NULL); + KASSERT(fnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != fnode); + KASSERT(tdnode != tnode); + KASSERT(fnode != tnode); + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(fnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL)); + KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((tnode == NULL) || + (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE)); + + /* + * If fdnode is immutable, we can't write to it. If fdnode is + * append-only, the only change we can make is to add entries + * to it. If fnode is immutable, we can't change the links to + * it. If fnode is append-only...well, this is what UFS does. + */ + if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND)) + return EPERM; + + /* + * If tdnode is immutable, we can't write to it. If tdnode is + * append-only, we can add entries, but we can't change + * existing entries. + */ + if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0))) + return EPERM; + + /* + * If tnode is immutable, we can't replace links to it. If + * tnode is append-only...well, this is what UFS does. + */ + if (tnode != NULL) { + KASSERT(tnode != NULL); + if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0) + return EPERM; + } + + return 0; +} + +/* + * Check whether a rename is permitted given our credentials. + * + * Everything must be locked and referenced. + */ +int +tmpfs_rename_check_permitted(struct ucred *cred, + struct tmpfs_node *fdnode, struct tmpfs_node *fnode, + struct tmpfs_node *tdnode, struct tmpfs_node *tnode) +{ + int error; + + KASSERT(fdnode != NULL); + KASSERT(fnode != NULL); + KASSERT(tdnode != NULL); + KASSERT(fdnode != fnode); + KASSERT(tdnode != tnode); + KASSERT(fnode != tnode); + KASSERT(fdnode->tn_vnode != NULL); + KASSERT(fnode->tn_vnode != NULL); + KASSERT(tdnode->tn_vnode != NULL); + KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL)); + KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((tnode == NULL) || + (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE)); + + /* + * We need to remove or change an entry in the source directory. + */ + error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred, curproc); + if (error) + return error; + + /* + * If we are changing directories, then we need to write to the + * target directory to add or change an entry. Also, if fnode + * is a directory, we need to write to it to change its `..' + * entry. + */ + if (fdnode != tdnode) { + error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred, curproc); + if (error) + return error; + if (fnode->tn_type == VDIR) { + error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred, + curproc); + if (error) + return error; + } + } + + error = tmpfs_check_sticky(cred, fdnode, fnode); + if (error) + return error; + + error = tmpfs_check_sticky(cred, tdnode, tnode); + if (error) + return error; + + return 0; +} + +/* + * Check whether removing node's entry in dnode is possible independent + * of credentials. + * + * Everything must be locked and referenced. + */ +int +tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(node != NULL); + KASSERT(dnode != node); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE); + + /* + * We want to delete the entry. If dnode is immutable, we + * can't write to it to delete the entry. If dnode is + * append-only, the only change we can make is to add entries, + * so we can't delete entries. If node is immutable, we can't + * change the links to it, so we can't delete the entry. If + * node is append-only...well, this is what UFS does. + */ + if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND)) + return EPERM; + + return 0; +} + +/* + * Check whether removing node's entry in dnode is permitted given our + * credentials. + * + * Everything must be locked and referenced. + */ +int +tmpfs_remove_check_permitted(struct ucred *cred, + struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + int error; + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(node != NULL); + KASSERT(dnode != node); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE); + + /* + * Check whether we are permitted to write to the source + * directory in order to delete an entry from it. + */ + error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred, curproc); + if (error) + return error; + + error = tmpfs_check_sticky(cred, dnode, node); + if (error) + return error; + + return 0; +} + +/* + * Check whether we may change an entry in a sticky directory. If the + * directory is sticky, the user must own either the directory or, if + * it exists, the node, in order to change the entry. + * + * Everything must be locked and referenced. + */ +int +tmpfs_check_sticky(struct ucred *cred, + struct tmpfs_node *dnode, struct tmpfs_node *node) +{ + + KASSERT(dnode != NULL); + KASSERT(dnode->tn_vnode != NULL); + KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE); + KASSERT((node == NULL) || (node->tn_vnode != NULL)); + KASSERT((node == NULL) || + (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE)); + + if (node == NULL) + return 0; + + if (dnode->tn_mode & S_ISTXT) { + if (cred->cr_uid != 0 && + cred->cr_uid != dnode->tn_uid && + cred->cr_uid != node->tn_uid) + return EPERM; + } + + return 0; +} + +void +tmpfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp, + struct vnode *tdvp, struct vnode *tvp) +{ + + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(tdvp != NULL); + KASSERT(fdvp != fvp); + KASSERT(fdvp != tvp); + KASSERT(tdvp != fvp); + KASSERT(tdvp != tvp); + KASSERT(fvp != tvp); + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); + + /* + * XXX What actually needs to be purged? + */ + + cache_purge(fdvp); + + if (fvp->v_type == VDIR) + cache_purge(fvp); + + if (tdvp != fdvp) + cache_purge(tdvp); + + if ((tvp != NULL) && (tvp->v_type == VDIR)) + cache_purge(tvp); +} diff --git a/sys/tmpfs/tmpfs_vnops.h b/sys/tmpfs/tmpfs_vnops.h new file mode 100644 index 00000000000..13f07e2cc73 --- /dev/null +++ b/sys/tmpfs/tmpfs_vnops.h @@ -0,0 +1,84 @@ +/* $NetBSD: tmpfs_vnops.h,v 1.13 2011/05/24 20:17:49 rmind Exp $ */ + +/* + * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Julio M. Merino Vidal, developed as part of Google's Summer of Code + * 2005 program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FS_TMPFS_TMPFS_VNOPS_H_ +#define _FS_TMPFS_TMPFS_VNOPS_H_ + +#if !defined(_KERNEL) +#error not supposed to be exposed to userland. +#endif + +/* + * Declarations for tmpfs_vnops.c. + */ + +extern struct vops tmpfs_vops, tmpfs_fifovops, tmpfs_specvops; + +int tmpfs_lookup (void *); +int tmpfs_create (void *); +int tmpfs_mknod (void *); +int tmpfs_open (void *); +int tmpfs_close (void *); +int tmpfs_access (void *); +int tmpfs_getattr (void *); +int tmpfs_setattr (void *); +int tmpfs_read (void *); +int tmpfs_write (void *); +#define tmpfs_fcntl genfs_fcntl +int tmpfs_ioctl (void *); +int tmpfs_poll (void *); +#define tmpfs_mmap genfs_mmap +int tmpfs_fsync (void *); +#define tmpfs_seek genfs_seek +int tmpfs_remove (void *); +int tmpfs_link (void *); +int tmpfs_rename (void *); +int tmpfs_mkdir (void *); +int tmpfs_rmdir (void *); +int tmpfs_symlink (void *); +int tmpfs_readdir (void *); +int tmpfs_readlink (void *); +int tmpfs_inactive (void *); +int tmpfs_reclaim (void *); +int tmpfs_lock (void *); +int tmpfs_unlock (void *); +int tmpfs_islocked (void *); +int tmpfs_strategy (void *); +int tmpfs_print (void *); +int tmpfs_pathconf (void *); +int tmpfs_advlock (void *); +int tmpfs_bwrite (void *); +int tmpfs_getpages (void *); +int tmpfs_putpages (void *); +int tmpfs_whiteout (void *); + +#endif /* _FS_TMPFS_TMPFS_VNOPS_H_ */ |