/* $OpenBSD: sysv_shm.c,v 1.50 2009/06/02 12:11:16 guenther Exp $ */ /* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */ /* * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F39502-99-1-0512. */ /* * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Adam Glass and Charles M. * Hannum. * 4. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/kernel.h> #include <sys/shm.h> #include <sys/proc.h> #include <sys/uio.h> #include <sys/time.h> #include <sys/malloc.h> #include <sys/mman.h> #include <sys/pool.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/stat.h> #include <sys/mount.h> #include <sys/syscallargs.h> #include <uvm/uvm_extern.h> extern struct shminfo shminfo; struct shmid_ds **shmsegs; /* linear mapping of shmid -> shmseg */ struct pool shm_pool; unsigned short *shmseqs; /* array of shm sequence numbers */ struct shmid_ds *shm_find_segment_by_shmid(int); /* * Provides the following externally accessible functions: * * shminit(void); initialization * shmexit(struct vmspace *) cleanup * shmfork(struct vmspace *, struct vmspace *) fork handling * shmsys(arg1, arg2, arg3, arg4); shm{at,ctl,dt,get}(arg2, arg3, arg4) * * Structures: * shmsegs (an array of 'struct shmid_ds *') * per proc 'struct shmmap_head' with an array of 'struct shmmap_state' */ #define SHMSEG_REMOVED 0x0200 /* can't overlap ACCESSPERMS */ #define SHMSEG_RMLINGER 0x0400 int shm_last_free, shm_nused, shm_committed; struct shm_handle { struct uvm_object *shm_object; }; struct shmmap_state { vaddr_t va; int shmid; }; struct shmmap_head { int shmseg; struct shmmap_state state[1]; }; int shm_find_segment_by_key(key_t); void shm_deallocate_segment(struct shmid_ds *); int shm_delete_mapping(struct vmspace *, struct shmmap_state *); int shmget_existing(struct proc *, struct sys_shmget_args *, int, int, register_t *); int shmget_allocate_segment(struct proc *, struct sys_shmget_args *, int, register_t *); int shm_find_segment_by_key(key_t key) { struct shmid_ds *shmseg; int i; for (i = 0; i < shminfo.shmmni; i++) { shmseg = shmsegs[i]; if (shmseg != NULL && shmseg->shm_perm.key == key) return (i); } return (-1); } struct shmid_ds * shm_find_segment_by_shmid(int shmid) { int segnum; struct shmid_ds *shmseg; segnum = IPCID_TO_IX(shmid); if (segnum < 0 || segnum >= shminfo.shmmni || (shmseg = shmsegs[segnum]) == NULL || shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) return (NULL); if ((shmseg->shm_perm.mode & (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) return (NULL); return (shmseg); } void shm_deallocate_segment(struct shmid_ds *shmseg) { struct shm_handle *shm_handle; size_t size; shm_handle = shmseg->shm_internal; size = round_page(shmseg->shm_segsz); uao_detach(shm_handle->shm_object); pool_put(&shm_pool, shmseg); shm_committed -= atop(size); shm_nused--; } int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) { struct shmid_ds *shmseg; int segnum; size_t size; segnum = IPCID_TO_IX(shmmap_s->shmid); if (segnum < 0 || segnum >= shminfo.shmmni || (shmseg = shmsegs[segnum]) == NULL) return (EINVAL); size = round_page(shmseg->shm_segsz); uvm_deallocate(&vm->vm_map, shmmap_s->va, size); shmmap_s->shmid = -1; shmseg->shm_dtime = time_second; if ((--shmseg->shm_nattch <= 0) && (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { shm_deallocate_segment(shmseg); shm_last_free = segnum; shmsegs[shm_last_free] = NULL; } return (0); } int sys_shmdt(struct proc *p, void *v, register_t *retval) { struct sys_shmdt_args /* { syscallarg(const void *) shmaddr; } */ *uap = v; struct shmmap_head *shmmap_h; struct shmmap_state *shmmap_s; int i; shmmap_h = (struct shmmap_head *)p->p_vmspace->vm_shm; if (shmmap_h == NULL) return (EINVAL); for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1 && shmmap_s->va == (vaddr_t)SCARG(uap, shmaddr)) break; if (i == shmmap_h->shmseg) return (EINVAL); return (shm_delete_mapping(p->p_vmspace, shmmap_s)); } int sys_shmat(struct proc *p, void *v, register_t *retval) { struct sys_shmat_args /* { syscallarg(int) shmid; syscallarg(const void *) shmaddr; syscallarg(int) shmflg; } */ *uap = v; int error, i, flags; struct ucred *cred = p->p_ucred; struct shmid_ds *shmseg; struct shmmap_head *shmmap_h; struct shmmap_state *shmmap_s; struct shm_handle *shm_handle; vaddr_t attach_va; vm_prot_t prot; vsize_t size; shmmap_h = (struct shmmap_head *)p->p_vmspace->vm_shm; if (shmmap_h == NULL) { size = sizeof(int) + shminfo.shmseg * sizeof(struct shmmap_state); shmmap_h = malloc(size, M_SHM, M_WAITOK); shmmap_h->shmseg = shminfo.shmseg; for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++, shmmap_s++) shmmap_s->shmid = -1; p->p_vmspace->vm_shm = (caddr_t)shmmap_h; } shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); if (shmseg == NULL) return (EINVAL); error = ipcperm(cred, &shmseg->shm_perm, (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); if (error) return (error); for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++) { if (shmmap_s->shmid == -1) break; shmmap_s++; } if (i >= shmmap_h->shmseg) return (EMFILE); size = round_page(shmseg->shm_segsz); prot = VM_PROT_READ; if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0) prot |= VM_PROT_WRITE; flags = MAP_ANON | MAP_SHARED; if (SCARG(uap, shmaddr)) { flags |= MAP_FIXED; if (SCARG(uap, shmflg) & SHM_RND) attach_va = (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) attach_va = (vaddr_t)SCARG(uap, shmaddr); else return (EINVAL); } else { /* This is just a hint to uvm_map() about where to put it. */ attach_va = uvm_map_hint(p, prot); } shm_handle = shmseg->shm_internal; uao_reference(shm_handle->shm_object); error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size, shm_handle->shm_object, 0, 0, UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); if (error) { uao_detach(shm_handle->shm_object); return (error); } shmmap_s->va = attach_va; shmmap_s->shmid = SCARG(uap, shmid); shmseg->shm_lpid = p->p_p->ps_mainproc->p_pid; shmseg->shm_atime = time_second; shmseg->shm_nattch++; *retval = attach_va; return (0); } int sys_shmctl(struct proc *p, void *v, register_t *retval) { struct sys_shmctl_args /* { syscallarg(int) shmid; syscallarg(int) cmd; syscallarg(struct shmid_ds *) buf; } */ *uap = v; return (shmctl1(p, SCARG(uap, shmid), SCARG(uap, cmd), (caddr_t)SCARG(uap, buf), copyin, copyout)); } int shmctl1(struct proc *p, int shmid, int cmd, caddr_t buf, int (*ds_copyin)(const void *, void *, size_t), int (*ds_copyout)(const void *, void *, size_t)) { struct ucred *cred = p->p_ucred; struct shmid_ds inbuf, *shmseg; int error; shmseg = shm_find_segment_by_shmid(shmid); if (shmseg == NULL) return (EINVAL); switch (cmd) { case IPC_STAT: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) return (error); error = ds_copyout(shmseg, buf, sizeof(inbuf)); if (error) return (error); break; case IPC_SET: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) return (error); error = ds_copyin(buf, &inbuf, sizeof(inbuf)); if (error) return (error); shmseg->shm_perm.uid = inbuf.shm_perm.uid; shmseg->shm_perm.gid = inbuf.shm_perm.gid; shmseg->shm_perm.mode = (shmseg->shm_perm.mode & ~ACCESSPERMS) | (inbuf.shm_perm.mode & ACCESSPERMS); shmseg->shm_ctime = time_second; break; case IPC_RMID: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) return (error); shmseg->shm_perm.key = IPC_PRIVATE; shmseg->shm_perm.mode |= SHMSEG_REMOVED; if (shmseg->shm_nattch <= 0) { shm_deallocate_segment(shmseg); shm_last_free = IPCID_TO_IX(shmid); shmsegs[shm_last_free] = NULL; } break; case SHM_LOCK: case SHM_UNLOCK: default: return (EINVAL); } return (0); } int shmget_existing(struct proc *p, struct sys_shmget_args /* { syscallarg(key_t) key; syscallarg(size_t) size; syscallarg(int) shmflg; } */ *uap, int mode, int segnum, register_t *retval) { struct shmid_ds *shmseg; struct ucred *cred = p->p_ucred; int error; shmseg = shmsegs[segnum]; /* We assume the segnum is valid */ if ((error = ipcperm(cred, &shmseg->shm_perm, mode)) != 0) return (error); if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) return (EINVAL); if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) return (EEXIST); *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); return (0); } int shmget_allocate_segment(struct proc *p, struct sys_shmget_args /* { syscallarg(key_t) key; syscallarg(size_t) size; syscallarg(int) shmflg; } */ *uap, int mode, register_t *retval) { size_t size; key_t key; int segnum; struct ucred *cred = p->p_ucred; struct shmid_ds *shmseg; struct shm_handle *shm_handle; int error = 0; if (SCARG(uap, size) < shminfo.shmmin || SCARG(uap, size) > shminfo.shmmax) return (EINVAL); if (shm_nused >= shminfo.shmmni) /* any shmids left? */ return (ENOSPC); size = round_page(SCARG(uap, size)); if (shm_committed + atop(size) > shminfo.shmall) return (ENOMEM); shm_nused++; shm_committed += atop(size); /* * If a key has been specified and we had to wait for memory * to be freed up we need to verify that no one has allocated * the key we want in the meantime. Yes, this is ugly. */ key = SCARG(uap, key); shmseg = pool_get(&shm_pool, key == IPC_PRIVATE ? PR_WAITOK : 0); if (shmseg == NULL) { shmseg = pool_get(&shm_pool, PR_WAITOK); if (shm_find_segment_by_key(key) != -1) { pool_put(&shm_pool, shmseg); shm_nused--; shm_committed -= atop(size); return (EAGAIN); } } /* XXX - hash shmids instead */ if (shm_last_free < 0) { for (segnum = 0; segnum < shminfo.shmmni && shmsegs[segnum]; segnum++) ; if (segnum == shminfo.shmmni) panic("shmseg free count inconsistent"); } else { segnum = shm_last_free; if (++shm_last_free >= shminfo.shmmni || shmsegs[shm_last_free]) shm_last_free = -1; } shmsegs[segnum] = shmseg; shm_handle = (struct shm_handle *)((caddr_t)shmseg + sizeof(*shmseg)); shm_handle->shm_object = uao_create(size, 0); shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; shmseg->shm_perm.mode = (mode & (ACCESSPERMS|SHMSEG_RMLINGER)); shmseg->shm_perm.seq = shmseqs[segnum] = (shmseqs[segnum] + 1) & 0x7fff; shmseg->shm_perm.key = key; shmseg->shm_segsz = SCARG(uap, size); shmseg->shm_cpid = p->p_p->ps_mainproc->p_pid; shmseg->shm_lpid = shmseg->shm_nattch = 0; shmseg->shm_atime = shmseg->shm_dtime = 0; shmseg->shm_ctime = time_second; shmseg->shm_internal = shm_handle; *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); return (error); } int sys_shmget(struct proc *p, void *v, register_t *retval) { struct sys_shmget_args /* { syscallarg(key_t) key; syscallarg(size_t) size; syscallarg(int) shmflg; } */ *uap = v; int segnum, mode, error; mode = SCARG(uap, shmflg) & ACCESSPERMS; if (SCARG(uap, shmflg) & _SHM_RMLINGER) mode |= SHMSEG_RMLINGER; if (SCARG(uap, key) != IPC_PRIVATE) { again: segnum = shm_find_segment_by_key(SCARG(uap, key)); if (segnum >= 0) return (shmget_existing(p, uap, mode, segnum, retval)); if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) return (ENOENT); } error = shmget_allocate_segment(p, uap, mode, retval); if (error == EAGAIN) goto again; return (error); } void shmfork(struct vmspace *vm1, struct vmspace *vm2) { struct shmmap_head *shmmap_h; struct shmmap_state *shmmap_s; struct shmid_ds *shmseg; size_t size; int i; if (vm1->vm_shm == NULL) { vm2->vm_shm = NULL; return; } shmmap_h = (struct shmmap_head *)vm1->vm_shm; size = sizeof(int) + shmmap_h->shmseg * sizeof(struct shmmap_state); vm2->vm_shm = malloc(size, M_SHM, M_WAITOK); bcopy(vm1->vm_shm, vm2->vm_shm, size); for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++, shmmap_s++) { if (shmmap_s->shmid != -1 && (shmseg = shmsegs[IPCID_TO_IX(shmmap_s->shmid)]) != NULL) shmseg->shm_nattch++; } } void shmexit(struct vmspace *vm) { struct shmmap_head *shmmap_h; struct shmmap_state *shmmap_s; int i; shmmap_h = (struct shmmap_head *)vm->vm_shm; if (shmmap_h == NULL) return; for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1) shm_delete_mapping(vm, shmmap_s); free(vm->vm_shm, M_SHM); vm->vm_shm = NULL; } void shminit(void) { pool_init(&shm_pool, sizeof(struct shmid_ds) + sizeof(struct shm_handle), 0, 0, 0, "shmpl", &pool_allocator_nointr); shmsegs = malloc(shminfo.shmmni * sizeof(struct shmid_ds *), M_SHM, M_WAITOK|M_ZERO); shmseqs = malloc(shminfo.shmmni * sizeof(unsigned short), M_SHM, M_WAITOK|M_ZERO); shminfo.shmmax *= PAGE_SIZE; /* actually in pages */ shm_last_free = 0; shm_nused = 0; shm_committed = 0; } /* * Userland access to struct shminfo. */ int sysctl_sysvshm(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int error, val; struct shmid_ds **newsegs; unsigned short *newseqs; if (namelen != 2) { switch (name[0]) { case KERN_SHMINFO_SHMMAX: case KERN_SHMINFO_SHMMIN: case KERN_SHMINFO_SHMMNI: case KERN_SHMINFO_SHMSEG: case KERN_SHMINFO_SHMALL: break; default: return (ENOTDIR); /* overloaded */ } } switch (name[0]) { case KERN_SHMINFO_SHMMAX: if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &shminfo.shmmax)) || newp == NULL) return (error); /* If new shmmax > shmall, crank shmall */ if (atop(round_page(shminfo.shmmax)) > shminfo.shmall) shminfo.shmall = atop(round_page(shminfo.shmmax)); return (0); case KERN_SHMINFO_SHMMIN: val = shminfo.shmmin; if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) || val == shminfo.shmmin) return (error); if (val <= 0) return (EINVAL); /* shmmin must be >= 1 */ shminfo.shmmin = val; return (0); case KERN_SHMINFO_SHMMNI: val = shminfo.shmmni; if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) || val == shminfo.shmmni) return (error); if (val < shminfo.shmmni || val > 0xffff) return (EINVAL); /* Expand shmsegs and shmseqs arrays */ newsegs = malloc(val * sizeof(struct shmid_ds *), M_SHM, M_WAITOK|M_ZERO); bcopy(shmsegs, newsegs, shminfo.shmmni * sizeof(struct shmid_ds *)); free(shmsegs, M_SHM); shmsegs = newsegs; newseqs = malloc(val * sizeof(unsigned short), M_SHM, M_WAITOK|M_ZERO); bcopy(shmseqs, newseqs, shminfo.shmmni * sizeof(unsigned short)); free(shmseqs, M_SHM); shmseqs = newseqs; shminfo.shmmni = val; return (0); case KERN_SHMINFO_SHMSEG: val = shminfo.shmseg; if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) || val == shminfo.shmseg) return (error); if (val <= 0) return (EINVAL); /* shmseg must be >= 1 */ shminfo.shmseg = val; return (0); case KERN_SHMINFO_SHMALL: val = shminfo.shmall; if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) || val == shminfo.shmall) return (error); if (val < shminfo.shmall) return (EINVAL); /* can't decrease shmall */ shminfo.shmall = val; return (0); default: return (EOPNOTSUPP); } /* NOTREACHED */ }