summaryrefslogtreecommitdiff
path: root/sys/netinet/in_pcb.h
blob: 6e0b656b19f794f41b2115abcc6694aabf6102fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/*	$OpenBSD: in_pcb.h,v 1.157 2024/04/19 10:13:58 bluhm Exp $	*/
/*	$NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $	*/

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1990, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
 */

#ifndef _NETINET_IN_PCB_H_
#define _NETINET_IN_PCB_H_

#include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/refcnt.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/ip_ipsp.h>

#include <crypto/siphash.h>

/*
 * Locks used to protect struct members in this file:
 *	I	immutable after creation
 *	N	net lock
 *	t	inpt_mtx		pcb table mutex
 *	y	inpt_notify		pcb table rwlock for notify
 *	p	inpcb_mtx		pcb mutex
 *	L	pf_inp_mtx		link pf to inp mutex
 *	s	so_lock			socket rwlock
 */

/*
 * The pcb table mutex guarantees that all inpcb are consistent and
 * that bind(2) and connect(2) create unique combinations of
 * laddr/faddr/lport/fport/rtalbleid.  This mutex is used to protect
 * both address consistency and inpcb lookup during protocol input.
 * All writes to inp_[lf]addr take table mutex.  A per socket lock is
 * needed, so that socket layer input have a consistent view at these
 * values.
 *
 * In soconnect() and sosend() pcb mutex cannot be used.  They eventually
 * can call IP output which takes pf lock which is a sleeping lock.
 * Also connect(2) does a route lookup for source selection.  There
 * route resolve happens, which creates a route, which sends a route
 * message, which needs route lock, which is a rw-lock.
 *
 * On the other hand a mutex should be used in protocol input.  It
 * does not make sense to do a process switch per packet.  Better spin
 * until the packet can be processed.
 *
 * So there are three locks.  Table mutex is for writing inp_[lf]addr/port
 * and lookup, socket rw-lock to separate sockets in system calls, and
 * pcb mutex to protect socket receive buffer.  Changing inp_[lf]addr/port
 * takes both per socket rw-lock and global table mutex.  Protocol
 * input only reads inp_[lf]addr/port during lookup and is safe.  System
 * call only reads when holding socket rw-lock and is safe.  The socket
 * layer needs pcb mutex only in soreceive().
 *
 * Function pru_lock() grabs the pcb mutex and its existence indicates
 * that a protocol is MP safe.  Otherwise the exclusive net lock is
 * used.
 */

struct pf_state_key;

union inpaddru {
	struct in_addr iau_addr;
	struct in6_addr iau_addr6;
};

/*
 * Common structure pcb for internet protocol implementation.
 * Here are stored pointers to local and foreign host table
 * entries, local and foreign socket numbers, and pointers
 * up (to a socket structure) and down (to a protocol-specific)
 * control block.
 */
struct inpcb {
	LIST_ENTRY(inpcb) inp_hash;		/* [t] local and foreign hash */
	LIST_ENTRY(inpcb) inp_lhash;		/* [t] local port hash */
	TAILQ_ENTRY(inpcb) inp_queue;		/* [t] inet PCB queue */
	SIMPLEQ_ENTRY(inpcb) inp_notify;	/* [y] notify or udp append */
	struct	  inpcbtable *inp_table;	/* [I] inet queue/hash table */
	union	  inpaddru inp_faddru;		/* [t] Foreign address. */
	union	  inpaddru inp_laddru;		/* [t] Local address. */
#define	inp_faddr	inp_faddru.iau_addr
#define	inp_faddr6	inp_faddru.iau_addr6
#define	inp_laddr	inp_laddru.iau_addr
#define	inp_laddr6	inp_laddru.iau_addr6
	u_int16_t inp_fport;		/* [t] foreign port */
	u_int16_t inp_lport;		/* [t] local port */
	struct	  socket *inp_socket;	/* [I] back pointer to socket */
	caddr_t	  inp_ppcb;		/* pointer to per-protocol pcb */
	struct    route inp_route;	/* cached route */
	struct    refcnt inp_refcnt;	/* refcount PCB, delay memory free */
	struct	  mutex inp_mtx;	/* protect PCB and socket members */
	int	  inp_flags;		/* generic IP/datagram flags */
	union {				/* Header prototype. */
		struct ip hu_ip;
		struct ip6_hdr hu_ipv6;
	} inp_hu;
#define	inp_ip		inp_hu.hu_ip
#define	inp_ipv6	inp_hu.hu_ipv6
	union {
		struct	mbuf *inp_options;		/* IPv4 options */
		struct	ip6_pktopts *inp_outputopts6;	/* IPv6 options */
	};
	int inp_hops;
	union {
		struct ip_moptions *mou_mo;
		struct ip6_moptions *mou_mo6;
	} inp_mou;
#define inp_moptions inp_mou.mou_mo	/* [N] IPv4 multicast options */
#define inp_moptions6 inp_mou.mou_mo6	/* [N] IPv6 multicast options */
	struct	ipsec_level   inp_seclevel;	/* [N] IPsec level of socket */
	u_char	inp_ip_minttl;		/* minimum TTL or drop */
#define inp_ip6_minhlim inp_ip_minttl	/* minimum Hop Limit or drop */
#define	inp_flowinfo	inp_hu.hu_ipv6.ip6_flow

	int	inp_cksum6;
	struct	icmp6_filter *inp_icmp6filt;
	struct	pf_state_key *inp_pf_sk; /* [L] */
	struct	mbuf *(*inp_upcall)(void *, struct mbuf *,
		    struct ip *, struct ip6_hdr *, void *, int);
	void	*inp_upcall_arg;
	u_int	inp_rtableid;		/* [t] */
	int	inp_pipex;		/* pipex indication */
	uint16_t inp_flowid;		/* [s] */
};

LIST_HEAD(inpcbhead, inpcb);

struct inpcbtable {
	struct mutex inpt_mtx;			/* protect queue and hash */
	struct rwlock inpt_notify;		/* protect inp_notify list */
	TAILQ_HEAD(inpthead, inpcb) inpt_queue;	/* [t] inet PCB queue */
	struct	inpcbhead *inpt_hashtbl;	/* [t] local and foreign hash */
	struct	inpcbhead *inpt_lhashtbl;	/* [t] local port hash */
	SIPHASH_KEY inpt_key, inpt_lkey;	/* [I] secrets for hashes */
	u_long	inpt_mask, inpt_lmask;		/* [t] hash masks */
	int	inpt_count, inpt_size;		/* [t] queue count, hash size */
};

/* flags in inp_flags: */
#define	INP_RECVOPTS	0x001	/* receive incoming IP options */
#define	INP_RECVRETOPTS	0x002	/* receive IP options for reply */
#define	INP_RECVDSTADDR	0x004	/* receive IP dst address */

#define	INP_RXDSTOPTS	INP_RECVOPTS
#define	INP_RXHOPOPTS	INP_RECVRETOPTS
#define	INP_RXINFO	INP_RECVDSTADDR
#define	INP_RXSRCRT	0x010
#define	INP_HOPLIMIT	0x020

#define	INP_HDRINCL	0x008	/* user supplies entire IP header */
#define	INP_HIGHPORT	0x010	/* user wants "high" port binding */
#define	INP_LOWPORT	0x020	/* user wants "low" port binding */
#define	INP_RECVIF	0x080	/* receive incoming interface */
#define	INP_RECVTTL	0x040	/* receive incoming IP TTL */
#define	INP_RECVDSTPORT	0x200	/* receive IP dst addr before rdr */
#define	INP_RECVRTABLE	0x400	/* receive routing table */
#define	INP_IPSECFLOWINFO 0x800	/* receive IPsec flow info */

#define	INP_CONTROLOPTS	(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR| \
	    INP_RXSRCRT|INP_HOPLIMIT|INP_RECVIF|INP_RECVTTL|INP_RECVDSTPORT| \
	    INP_RECVRTABLE)

/*
 * These flags' values should be determined by either the transport
 * protocol at PRU_BIND, PRU_LISTEN, PRU_CONNECT, etc, or by in_pcb*().
 */
#define INP_IPV6	0x100	/* socket, proto, domain, family is PF_INET6 */

/*
 * Flags in inp_flags for IPV6
 */
#define IN6P_HIGHPORT		INP_HIGHPORT	/* user wants "high" port */
#define IN6P_LOWPORT		INP_LOWPORT	/* user wants "low" port */
#define IN6P_RECVDSTPORT	INP_RECVDSTPORT	/* receive IP dst addr before rdr */
#define IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
#define IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
#define IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
#define IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
#define IN6P_RTHDR		0x100000 /* receive routing header */
#define IN6P_TCLASS		0x400000 /* receive traffic class value */
#define IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */

#define IN6P_ANONPORT		0x4000000 /* port chosen for user */
#define IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
#define IN6P_MTU		0x80000000 /* receive path MTU */

#define IN6P_MINMTU		0x20000000 /* use minimum MTU */

#define IN6P_CONTROLOPTS	(IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
				 IN6P_DSTOPTS|IN6P_RTHDR|\
				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
				 IN6P_MTU|IN6P_RECVDSTPORT)

#define	INPLOOKUP_WILDCARD	1
#define	INPLOOKUP_SETLOCAL	2
#define	INPLOOKUP_IPV6		4

#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)

/* macros for handling bitmap of ports not to allocate dynamically */
#define	DP_MAPBITS	(sizeof(u_int32_t) * NBBY)
#define	DP_MAPSIZE	(howmany(65536, DP_MAPBITS))
#define	DP_SET(m, p)	((m)[(p) / DP_MAPBITS] |= (1U << ((p) % DP_MAPBITS)))
#define	DP_CLR(m, p)	((m)[(p) / DP_MAPBITS] &= ~(1U << ((p) % DP_MAPBITS)))
#define	DP_ISSET(m, p)	((m)[(p) / DP_MAPBITS] & (1U << ((p) % DP_MAPBITS)))

/* default values for baddynamicports [see ip_init()] */
#define	DEFBADDYNAMICPORTS_TCP	{ \
	587, 749, 750, 751, 853, 871, 2049, \
	6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, \
	0 }
#define	DEFBADDYNAMICPORTS_UDP	{ 623, 664, 749, 750, 751, 2049, \
	3784, 3785, 7784, /* BFD/S-BFD ports */ \
	 0 }

#define DEFROOTONLYPORTS_TCP { \
	2049, \
	0 }
#define DEFROOTONLYPORTS_UDP { \
	2049, \
	0 }

struct baddynamicports {
	u_int32_t tcp[DP_MAPSIZE];
	u_int32_t udp[DP_MAPSIZE];
};

#ifdef _KERNEL

#define IN_PCBLOCK_HOLD	1
#define IN_PCBLOCK_GRAB	2

extern struct inpcbtable rawcbtable, rawin6pcbtable;
extern struct baddynamicports baddynamicports;
extern struct baddynamicports rootonlyports;
extern int in_pcbnotifymiss;

void	 in_init(void);
void	 in_losing(struct inpcb *);
int	 in_pcballoc(struct socket *, struct inpcbtable *, int);
int	 in_pcbbind_locked(struct inpcb *, struct mbuf *, const void *,
	    struct proc *);
int	 in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
int	 in_pcbaddrisavail(const struct inpcb *, struct sockaddr_in *, int,
	    struct proc *);
int	 in_pcbconnect(struct inpcb *, struct mbuf *);
void	 in_pcbdetach(struct inpcb *);
struct inpcb *
	 in_pcbref(struct inpcb *);
void	 in_pcbunref(struct inpcb *);
void	 in_pcbdisconnect(struct inpcb *);
struct inpcb *
	 in_pcblookup(struct inpcbtable *, struct in_addr,
			       u_int, struct in_addr, u_int, u_int);
struct inpcb *
	 in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int,
	    struct mbuf *, u_int);
#ifdef INET6
uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *,
	    u_short, const struct in6_addr *, u_short);
struct inpcb *
	 in6_pcblookup(struct inpcbtable *, const struct in6_addr *,
	    u_int, const struct in6_addr *, u_int, u_int);
struct inpcb *
	 in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int,
	    struct mbuf *, u_int);
int	 in6_pcbaddrisavail_lock(const struct inpcb *, struct sockaddr_in6 *,
	    int, struct proc *, int);
int	 in6_pcbaddrisavail(const struct inpcb *, struct sockaddr_in6 *, int,
	    struct proc *);
int	 in6_pcbconnect(struct inpcb *, struct mbuf *);
void	 in6_setsockaddr(struct inpcb *, struct mbuf *);
void	 in6_setpeeraddr(struct inpcb *, struct mbuf *);
int	 in6_sockaddr(struct socket *, struct mbuf *);
int	 in6_peeraddr(struct socket *, struct mbuf *);
#endif /* INET6 */
void	 in_pcbinit(struct inpcbtable *, int);
struct inpcb *
	 in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int,
	    u_int, int);
void	 in_pcbnotifyall(struct inpcbtable *, const struct sockaddr_in *,
	    u_int, int, void (*)(struct inpcb *, int));
void	 in_pcbrehash(struct inpcb *);
void	 in_rtchange(struct inpcb *, int);
void	 in_setpeeraddr(struct inpcb *, struct mbuf *);
void	 in_setsockaddr(struct inpcb *, struct mbuf *);
int	 in_sockaddr(struct socket *, struct mbuf *);
int	 in_peeraddr(struct socket *, struct mbuf *);
int	 in_baddynamic(u_int16_t, u_int16_t);
int	 in_rootonly(u_int16_t, u_int16_t);
int	 in_pcbselsrc(struct in_addr *, struct sockaddr_in *, struct inpcb *);
struct rtentry *
	in_pcbrtentry(struct inpcb *);

/* INET6 stuff */
struct rtentry *
	in6_pcbrtentry(struct inpcb *);
void	in6_pcbnotify(struct inpcbtable *, const struct sockaddr_in6 *,
	u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *,
	void (*)(struct inpcb *, int));
int	in6_selecthlim(const struct inpcb *);
int	in_pcbset_rtableid(struct inpcb *, u_int);
void	in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
void	in_pcbunset_faddr(struct inpcb *);
void	in_pcbunset_laddr(struct inpcb *);

#endif /* _KERNEL */
#endif /* _NETINET_IN_PCB_H_ */