summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2003-04-14 04:53:52 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2003-04-14 04:53:52 +0000
commit901c826a09497ead7016206abf7b50656baff91e (patch)
tree2721a7366483b12d83c3a02b75f402e9e3fd0860
parentbcf4f93c865e19f6d796c4f4095be600cd2b319e (diff)
There are two related changes.
The first one is an mquery(2) syscall. It's for asking the VM system about where to map things. It will be used by ld.so, read the man page for details. The second change is related and is a centralization of uvm_map hint that all callers of uvm_map calculated. This will allow us to adjust this hint on architectures that have segments for non-exec mappings. deraadt@ drahn@ ok.
-rw-r--r--lib/libc/shlib_version2
-rw-r--r--lib/libc/sys/Makefile.inc8
-rw-r--r--lib/libc/sys/mquery.2127
-rw-r--r--lib/libc/sys/mquery.c17
-rw-r--r--sys/kern/init_sysent.c4
-rw-r--r--sys/kern/kern_exec.c4
-rw-r--r--sys/kern/syscalls.c3
-rw-r--r--sys/kern/syscalls.master4
-rw-r--r--sys/kern/sysv_shm.c5
-rw-r--r--sys/sys/mman.h4
-rw-r--r--sys/sys/syscall.h7
-rw-r--r--sys/sys/syscallargs.h11
-rw-r--r--sys/uvm/uvm_map.c13
-rw-r--r--sys/uvm/uvm_map.h3
-rw-r--r--sys/uvm/uvm_mmap.c70
15 files changed, 256 insertions, 26 deletions
diff --git a/lib/libc/shlib_version b/lib/libc/shlib_version
index a048d137bb3..7b897a195c1 100644
--- a/lib/libc/shlib_version
+++ b/lib/libc/shlib_version
@@ -1,2 +1,2 @@
major=29
-minor=0 # note: remember to update minor in ../libpthread/shlib_version
+minor=1 # note: remember to update minor in ../libpthread/shlib_version
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index e6030715104..4501f4d6445 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile.inc,v 1.64 2003/01/31 21:47:46 millert Exp $
+# $OpenBSD: Makefile.inc,v 1.65 2003/04/14 04:53:50 art Exp $
# $NetBSD: Makefile.inc,v 1.35 1995/10/16 23:49:07 jtc Exp $
# @(#)Makefile.inc 8.1 (Berkeley) 6/17/93
@@ -21,7 +21,7 @@ DPSRCS+= Lint_Ovfork.c Lint_brk.c Lint_exect.c Lint_fork.c \
# glue to provide compatibility between GCC 1.X and 2.X and for compat
# with old syscall interfaces.
-SRCS+= ftruncate.c lseek.c mmap.c ptrace.c semctl.c truncate.c \
+SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
timer_create.c timer_delete.c timer_getoverrun.c timer_gettime.c \
timer_settime.c pread.c preadv.c pwrite.c pwritev.c
@@ -216,8 +216,8 @@ MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 chflags.2 \
getpid.2 getpriority.2 getrlimit.2 getrusage.2 getsid.2 getsockname.2 \
getsockopt.2 gettimeofday.2 getuid.2 intro.2 issetugid.2 ioctl.2 \
kill.2 kqueue.2 ktrace.2 link.2 \
- listen.2 lseek.2 mkdir.2 mkfifo.2 mknod.2 madvise.2 mincore.2 \
- minherit.2 mlock.2 mlockall.2 \
+ listen.2 lseek.2 mkdir.2 mkfifo.2 mknod.2 madvise.2 \
+ mquery.2 mincore.2 minherit.2 mlock.2 mlockall.2 \
mmap.2 mount.2 mprotect.2 msync.2 munmap.2 nanosleep.2 \
nfssvc.2 open.2 pathconf.2 \
pipe.2 profil.2 poll.2 ptrace.2 quotactl.2 read.2 readlink.2 reboot.2 \
diff --git a/lib/libc/sys/mquery.2 b/lib/libc/sys/mquery.2
new file mode 100644
index 00000000000..9e835bc4233
--- /dev/null
+++ b/lib/libc/sys/mquery.2
@@ -0,0 +1,127 @@
+.\" $OpenBSD: mquery.2,v 1.1 2003/04/14 04:53:50 art Exp $
+.\"
+.\" Copyright (c) 2003 Artur Grabowski <art@openbsd.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. The name of the author may not be used to endorse or promote products
+.\" derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd April 2, 2003
+.Dt MQUERY 2
+.Os
+.Sh NAME
+.Nm mquery
+.Nd provide mapping hints to applications
+.Sh SYNOPSIS
+.Fd #include <sys/types.h>
+.Fd #include <sys/mman.h>
+.Ft int
+.Fn mquery "int flags" "void **addr" "size_t size" "int fd" "off_t off"
+.Sh DESCRIPTION
+The
+.Nm mqeury
+system call checks the existing memory mappings of a process and returns
+hints to the caller about where to put a memory mapping.
+This hint can be later used when performing memory mappings with the
+.Fn mmap
+system call with
+.Dv MAP_FIXED
+in the flags.
+The
+.Fa addr
+argument should point to a memory location that stores a pointer
+which is used both as an input argument where the caller specifies the
+preferred address and as an output of the resulting hint.
+The
+.Fa size
+argument specifies the requested size of the memory area the caller
+is looking for.
+The
+.Fa fd
+and
+.Fa off
+arguments specify the file that will be mapped and the offset in it,
+this is the same as the corresponding arguments to
+.Fn mmap .
+.Pp
+The behavior of the function depends on the
+.Fa flags
+argument.
+If set to
+.Dv MAP_FIXED
+the pointer
+.Fa *addr
+is used as a fixed hint and
+.Fn mquery
+will return \-1 and set
+.Va errno
+to
+.Dv ENOMEM
+if there is not
+.Fa size
+bytes free after that address.
+Otherwise it will return 0 and
+.Fa *addr
+will not be changed.
+If no flags are set
+.Fn mquery
+will use
+.Fa *addr
+as a starting point in memory and will search forward to find
+a memory area with
+.Fa size
+bytes free and that will be suitable for creating a mapping for the
+file and offset specified in the
+.Fa fd
+and
+.Fa off
+arguments.
+When no such area can be found
+.Fn mquery
+will return \-1 and set
+.Va errno
+to
+.Dv ENOMEM .
+.Sh RETURN VALUES
+When a memory range satisfying the request is found
+.Fn mquery
+returns 0.
+Otherwise, \-1 is returned and
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+.Fn mquery
+will fail if:
+.Bl -tag -width ENOMEM
+.It Bq Er ENOMEM
+.Dv MAP_FIXED
+was specified and the requested memory area is unavailable.
+.It Bq Er ENOMEM
+There was not enough memory left after the hint specified.
+.It Bq Er EBADF
+.Fa fd
+is not a valid open file descriptor.
+.Sh SEE ALSO
+.Xr mmap 2
+.Sh HISTORY
+The
+.Fn mquery
+function first appeared in
+.Ox 3.4 .
diff --git a/lib/libc/sys/mquery.c b/lib/libc/sys/mquery.c
new file mode 100644
index 00000000000..2cb28e27a9b
--- /dev/null
+++ b/lib/libc/sys/mquery.c
@@ -0,0 +1,17 @@
+/* $OpenBSD: mquery.c,v 1.1 2003/04/14 04:53:50 art Exp $ */
+/*
+ * Written by Artur Grabowski <art@openbsd.org> Public Domain
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+/*
+ * This function provides 64-bit offset padding.
+ */
+int
+mquery(int flags, void **addr, size_t size, int fd, off_t off)
+{
+ return(__syscall((quad_t)SYS_mquery, flags, addr, size, fd, off));
+}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 1ad052d3757..a95f5062910 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_sysent.c,v 1.59 2003/01/30 15:32:44 millert Exp $ */
+/* $OpenBSD: init_sysent.c,v 1.60 2003/04/14 04:53:50 art Exp $ */
/*
* System call switch table.
@@ -746,5 +746,7 @@ struct sysent sysent[] = {
sys_getresgid }, /* 283 = getresgid */
{ 3, s(struct sys_setresgid_args),
sys_setresgid }, /* 284 = setresgid */
+ { 5, s(struct sys_mquery_args),
+ sys_mquery }, /* 285 = mquery */
};
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 77faaf4960f..9504237bc22 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exec.c,v 1.76 2003/03/09 01:27:50 millert Exp $ */
+/* $OpenBSD: kern_exec.c,v 1.77 2003/04/14 04:53:50 art Exp $ */
/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */
/*-
@@ -732,7 +732,7 @@ exec_sigcode_map(struct proc *p, struct emul *e)
}
/* Just a hint to uvm_mmap where to put it. */
- p->p_sigcode = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
+ p->p_sigcode = uvm_map_hint(p, VM_PROT_READ|VM_PROT_EXECUTE);
uao_reference(e->e_sigobject);
if (uvm_map(&p->p_vmspace->vm_map, &p->p_sigcode, round_page(sz),
e->e_sigobject, 0, 0, UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX,
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 86cedb94c6c..01a67ae9d07 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscalls.c,v 1.59 2003/01/30 03:32:44 millert Exp $ */
+/* $OpenBSD: syscalls.c,v 1.60 2003/04/14 04:53:50 art Exp $ */
/*
* System call names.
@@ -380,4 +380,5 @@ char *syscallnames[] = {
"setresuid", /* 282 = setresuid */
"getresgid", /* 283 = getresgid */
"setresgid", /* 284 = setresgid */
+ "mquery", /* 285 = mquery */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index b427bc216ee..e414ac0e100 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -1,4 +1,4 @@
-; $OpenBSD: syscalls.master,v 1.52 2003/01/30 03:29:49 millert Exp $
+; $OpenBSD: syscalls.master,v 1.53 2003/04/14 04:53:50 art Exp $
; $NetBSD: syscalls.master,v 1.32 1996/04/23 10:24:21 mycroft Exp $
; @(#)syscalls.master 8.2 (Berkeley) 1/13/94
@@ -570,3 +570,5 @@
gid_t *sgid); }
284 STD { int sys_setresgid(gid_t rgid, gid_t egid, \
gid_t sgid); }
+285 STD { int sys_mquery(int flags, void **addr, size_t size, \
+ int fd, off_t off); }
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index aaf85095b1a..56b473ccce1 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sysv_shm.c,v 1.33 2003/01/07 00:34:41 millert Exp $ */
+/* $OpenBSD: sysv_shm.c,v 1.34 2003/04/14 04:53:50 art Exp $ */
/* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */
/*
@@ -273,8 +273,7 @@ sys_shmat(struct proc *p, void *v, register_t *retval)
return (EINVAL);
} else {
/* This is just a hint to uvm_map() about where to put it. */
- attach_va = round_page((vaddr_t)p->p_vmspace->vm_taddr +
- MAXTSIZ + MAXDSIZ);
+ attach_va = uvm_map_hint(p, prot);
}
shm_handle = shmseg->shm_internal;
uao_reference(shm_handle->shm_object);
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
index 2676f153f05..180c57726a8 100644
--- a/sys/sys/mman.h
+++ b/sys/sys/mman.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mman.h,v 1.11 2002/03/14 01:27:14 millert Exp $ */
+/* $OpenBSD: mman.h,v 1.12 2003/04/14 04:53:50 art Exp $ */
/* $NetBSD: mman.h,v 1.11 1995/03/26 20:24:23 jtc Exp $ */
/*-
@@ -106,7 +106,6 @@
#define MCL_CURRENT 0x01 /* lock all pages currently mapped */
#define MCL_FUTURE 0x02 /* lock all pages mapped in the future */
-
#ifndef _KERNEL
#include <sys/cdefs.h>
@@ -124,6 +123,7 @@ int munlockall(void);
int madvise(void *, size_t, int);
int mincore(void *, size_t, char *);
int minherit(void *, size_t, int);
+int mquery(int flags, void **addr, size_t size, int fd, off_t off);
__END_DECLS
#endif /* !_KERNEL */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index c1af409324c..5ca97f3505b 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscall.h,v 1.58 2003/01/30 03:32:44 millert Exp $ */
+/* $OpenBSD: syscall.h,v 1.59 2003/04/14 04:53:50 art Exp $ */
/*
* System call numbers.
@@ -668,4 +668,7 @@
/* syscall: "setresgid" ret: "int" args: "gid_t" "gid_t" "gid_t" */
#define SYS_setresgid 284
-#define SYS_MAXSYSCALL 285
+/* syscall: "mquery" ret: "int" args: "int" "void **" "size_t" "int" "off_t" */
+#define SYS_mquery 285
+
+#define SYS_MAXSYSCALL 286
diff --git a/sys/sys/syscallargs.h b/sys/sys/syscallargs.h
index 420bd13bbb3..e2d70e279d8 100644
--- a/sys/sys/syscallargs.h
+++ b/sys/sys/syscallargs.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscallargs.h,v 1.60 2003/01/30 03:32:44 millert Exp $ */
+/* $OpenBSD: syscallargs.h,v 1.61 2003/04/14 04:53:50 art Exp $ */
/*
* System call argument lists.
@@ -1203,6 +1203,14 @@ struct sys_setresgid_args {
syscallarg(gid_t) sgid;
};
+struct sys_mquery_args {
+ syscallarg(int) flags;
+ syscallarg(void **) addr;
+ syscallarg(size_t) size;
+ syscallarg(int) fd;
+ syscallarg(off_t) off;
+};
+
/*
* System call prototypes.
*/
@@ -1492,3 +1500,4 @@ int sys_getresuid(struct proc *, void *, register_t *);
int sys_setresuid(struct proc *, void *, register_t *);
int sys_getresgid(struct proc *, void *, register_t *);
int sys_setresgid(struct proc *, void *, register_t *);
+int sys_mquery(struct proc *, void *, register_t *);
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index a7cba41ff66..30af50d1508 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.c,v 1.56 2002/12/09 02:35:21 art Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.57 2003/04/14 04:53:51 art Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
@@ -1072,6 +1072,16 @@ uvm_map_spacefits(vm_map_t map, vaddr_t *phint, vsize_t length,
}
/*
+ * uvm_map_hint: return the beginning of the best area suitable for
+ * creating a new mapping with "prot" protection.
+ */
+vaddr_t
+uvm_map_hint(struct proc *p, vm_prot_t prot)
+{
+ return (round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ));
+}
+
+/*
* uvm_map_findspace: find "length" sized space in "map".
*
* => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is
@@ -3588,7 +3598,6 @@ uvmspace_fork(vm1)
return(vm2);
}
-
#if defined(DDB)
/*
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index 8dbc4fda58b..97e23a3d830 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.h,v 1.28 2002/10/29 18:30:21 art Exp $ */
+/* $OpenBSD: uvm_map.h,v 1.29 2003/04/14 04:53:51 art Exp $ */
/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -335,6 +335,7 @@ int uvm_map_extract(vm_map_t, vaddr_t, vsize_t,
vm_map_t, vaddr_t *, int);
vm_map_entry_t uvm_map_findspace(vm_map_t, vaddr_t, vsize_t, vaddr_t *,
struct uvm_object *, voff_t, vsize_t, int);
+vaddr_t uvm_map_hint(struct proc *, vm_prot_t);
int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t);
int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int);
void uvm_map_init(void);
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 0e09b0096f2..0a68cc18275 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_mmap.c,v 1.39 2003/04/07 14:47:08 mpech Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.40 2003/04/14 04:53:51 art Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -117,6 +117,68 @@ sys_sstk(p, v, retval)
}
/*
+ * sys_mquery: provide mapping hints to applications that do fixed mappings
+ *
+ * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
+ * don't care about PMAP_PREFER or such)
+ * addr: hint where we'd like to place the mapping.
+ * size: size of the mapping
+ * fd: fd of the file we want to map
+ * off: offset within the file
+ */
+
+/* ARGSUSED */
+int
+sys_mquery(struct proc *p, void *v, register_t *retval)
+{
+ struct sys_mquery_args /* {
+ syscallarg(int) flags;
+ syscallarg(void **) addr;
+ syscallarg(size_t) size;
+ syscallarg(int) fd;
+ syscallarg(off_t) off;
+ } */ *uap = v;
+ struct file *fp;
+ struct uvm_object *uobj;
+ voff_t uoff;
+ int error;
+ vaddr_t vaddr;
+ int flags = 0;
+ vm_prot_t prot = SCARG(uap, flags) & VM_PROT_ALL;
+
+ if (SCARG(uap, flags) & MAP_FIXED)
+ flags |= UVM_FLAG_FIXED;
+
+ if ((error = copyin(SCARG(uap, addr), &vaddr, sizeof(void *))) != 0)
+ return (error);
+
+ if (SCARG(uap, fd) >= 0) {
+ if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
+ return (error);
+ uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj;
+ uoff = SCARG(uap, off);
+ } else {
+ fp = NULL;
+ uobj = NULL;
+ uoff = 0;
+ }
+
+ if (vaddr == 0)
+ vaddr = uvm_map_hint(p, prot);
+
+ if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, SCARG(uap, size),
+ &vaddr, uobj, uoff, 0, flags) == NULL) {
+ error = ENOMEM;
+ } else {
+ error = copyout(&vaddr, SCARG(uap, addr), sizeof(void *));
+ }
+
+ if (fp != NULL)
+ FRELE(fp);
+ return (error);
+}
+
+/*
* sys_mincore: determine if pages are in core or not.
*/
@@ -350,10 +412,8 @@ sys_mmap(p, v, retval)
* we will refine our guess later (e.g. to account for VAC, etc)
*/
- if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr +
- MAXDSIZ))
- addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
- MAXDSIZ);
+ if (addr < uvm_map_hint(p, prot))
+ addr = uvm_map_hint(p, prot);
}
/*