/*	$OpenBSD: cleanerd.c,v 1.13 2003/11/08 19:17:27 jmc Exp $	*/

/*-
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char copyright[] =
"@(#) Copyright (c) 1992, 1993\n\
	The Regents of the University of California.  All rights reserved.\n";
#endif /* not lint */

#ifndef lint
/*static char sccsid[] = "@(#)cleanerd.c	8.5 (Berkeley) 6/10/95";*/
static char rcsid[] = "$OpenBSD: cleanerd.c,v 1.13 2003/11/08 19:17:27 jmc Exp $";
#endif /* not lint */

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/time.h>

#include <ufs/ufs/dinode.h>
#include <ufs/lfs/lfs.h>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>

#include "clean.h"
char *special = "cleanerd";
int do_small = 0;
int do_mmap = 0;
int stat_report = 0;
struct cleaner_stats {
	double	util_tot;
	double	util_sos;
	int	blocks_read;
	int	blocks_written;
	int	segs_cleaned;
	int	segs_empty;
	int	segs_error;
} cleaner_stats;

struct seglist { 
	int sl_id;	/* segment number */
	int sl_cost; 	/* cleaning cost */
	char sl_bytes;	/* bytes in segment */
};

struct tossstruct {
	struct lfs *lfs;
	int seg;
};

#define	CLEAN_BYTES	0x1

/* function prototypes for system calls; not sure where they should go */
int	 lfs_segwait(fsid_t *, struct timeval *);
int	 lfs_segclean(fsid_t *, u_long);
int	 lfs_bmapv(fsid_t *, BLOCK_INFO *, int);
int	 lfs_markv(fsid_t *, BLOCK_INFO *, int);

/* function prototypes */
int	 bi_tossold(const void *, const void *, const void *);
int	 choose_segments(FS_INFO *, struct seglist *, 
	     int (*)(FS_INFO *, SEGUSE *));
void	 clean_fs(FS_INFO *, int (*)(FS_INFO *, SEGUSE *), int, long);
int	 clean_loop(FS_INFO *, int, long);
int	 clean_segment(FS_INFO *, int);
int	 cost_benefit(FS_INFO *, SEGUSE *);
int	 cost_compare(const void *, const void *);
void	 sig_report(int);

/*
 * Cleaning Cost Functions:
 *
 * These return the cost of cleaning a segment.  The higher the cost value
 * the better it is to clean the segment, so empty segments have the highest
 * cost.  (It is probably better to think of this as a priority value
 * instead).
 *
 * This is the cost-benefit policy simulated and described in Rosenblum's
 * 1991 SOSP paper.
 */

int
cost_benefit(FS_INFO *fsp, SEGUSE *su)
{
	struct lfs *lfsp;
	struct timeval t;
	int age;
	int live;

	gettimeofday(&t, NULL);

	live = su->su_nbytes;	
	age = t.tv_sec < su->su_lastmod ? 0 : t.tv_sec - su->su_lastmod;
	
	lfsp = &fsp->fi_lfs;
	if (live == 0)
		return (t.tv_sec * lblkno(lfsp, seg_size(lfsp)));
	else {
		/* 
		 * from lfsSegUsage.c (Mendel's code).
		 * priority calculation is done using INTEGER arithmetic.
		 * sizes are in BLOCKS (that is why we use lblkno below).
		 * age is in seconds.
		 *
		 * priority = ((seg_size - live) * age) / (seg_size + live)
		 */
#ifdef VERBOSE
		if (live < 0 || live > seg_size(lfsp)) {
			err(0, "Bad segusage count: %d", live);
			live = 0;
		}
#endif
		return (lblkno(lfsp, seg_size(lfsp) - live) * age)
			/ lblkno(lfsp, seg_size(lfsp) + live);
	}
}

int
main(int argc, char *argv[])
{
	FS_INFO	*fsp;
	struct statfs *lstatfsp;	/* file system stats */
	struct timeval timeout;		/* sleep timeout */
	fsid_t fsid;
	long clean_opts;		/* cleaning options */
	int nodaemon, segs_per_clean;
	int opt, cmd_err;
	char *fs_name;			/* name of filesystem to clean */
	extern int optind;

	cmd_err = nodaemon = 0;
	clean_opts = 0;
	segs_per_clean = 1;
	while ((opt = getopt(argc, argv, "bdmn:r:s")) != -1) {
		switch (opt) {
			case 'b':	/*
					 * Use live bytes to determine
					 * how many segs to clean.
					 */
				clean_opts |= CLEAN_BYTES;
				break;
			case 'd':	/* Debug mode. */
				nodaemon = 1;
				break;
			case 'm':	/* Use mmap instead of read/write */
				do_mmap = 1;
				break;
			case 'n':	/* How many segs to clean at once */
				segs_per_clean = atoi(optarg);
				break;
			case 'r':	/* Report every stat_report segments */
				stat_report = atoi(optarg);
				break;
			case 's':	/* small writes */
				do_small = 1;
				break;
			default:
				++cmd_err;
		}
	}
	argc -= optind;
	argv += optind;
	if (cmd_err || (argc != 1))
		err(1, "usage: lfs_cleanerd [-smd] fs_name");

	fs_name = argv[0];

	signal(SIGINT, sig_report);
	signal(SIGUSR1, sig_report);
	signal(SIGUSR2, sig_report);
	if (fs_getmntinfo(&lstatfsp, fs_name, MOUNT_LFS) == 0) {
		/* didn't find the filesystem */
		err(1, "lfs_cleanerd: filesystem %s isn't an LFS!", fs_name);
	}

	if (!nodaemon)	/* should we become a daemon, chdir to / & close fd's */
		if (daemon(0, 0) == -1)
			err(1, "lfs_cleanerd: couldn't become a daemon!");

	timeout.tv_sec = 5*60; /* five minutes */
	timeout.tv_usec = 0;
	fsid.val[0] = 0;
	fsid.val[1] = 0;

	for (fsp = get_fs_info(lstatfsp, do_mmap); ;
	    reread_fs_info(fsp, do_mmap)) {
		/*
		 * clean the filesystem, and, if it needed cleaning
		 * (i.e. it returned nonzero) try it again
		 * to make sure that some nasty process hasn't just
		 * filled the disk system up.
		 */
		if (clean_loop(fsp, segs_per_clean, clean_opts))
			continue;

#ifdef VERBOSE
		(void)printf("Cleaner going to sleep.\n");
#endif
		if (lfs_segwait(&fsid, &timeout) < 0)
			err(0, "lfs_segwait: returned error");
#ifdef VERBOSE
		(void)printf("Cleaner waking up.\n");
#endif
	}
}

/* return the number of segments cleaned */
int
clean_loop(FS_INFO *fsp, int nsegs, long options)
{
	double loadavg[MAXLOADS];
	time_t	now;
	u_long max_free_segs;
	u_long db_per_seg;

	/*
	 * Compute the maximum possible number of free segments, given the
	 * number of free blocks.
	 */
	db_per_seg = fsbtodb(&fsp->fi_lfs, fsp->fi_lfs.lfs_ssize);
	max_free_segs = fsp->fi_lfs.lfs_bfree / db_per_seg;
	
	/* 
	 * We will clean if there are not enough free blocks or total clean
	 * space is less than BUSY_LIM % of possible clean space.
	 */
	now = time((time_t *)NULL);
#ifdef VERBOSE
	printf("db_er_seg = %d max_free_segs = %d, bfree = %d avail = %d ",
	    db_per_seg, max_free_segs, fsp->fi_lfs.lfs_bfree,
	    fsp->fi_lfs.lfs_avail);
	printf("clean = %d\n", fsp->fi_cip->clean);
#endif
	if ((fsp->fi_lfs.lfs_bfree - fsp->fi_lfs.lfs_avail > db_per_seg &&
	    fsp->fi_lfs.lfs_avail < db_per_seg) ||
	    (fsp->fi_cip->clean < max_free_segs &&
	    (fsp->fi_cip->clean <= MIN_SEGS(&fsp->fi_lfs) ||
	    fsp->fi_cip->clean < max_free_segs * BUSY_LIM))) {
		printf("Cleaner Running  at %s (%d of %lu segments available)\n",
		    ctime(&now), fsp->fi_cip->clean, max_free_segs);
		clean_fs(fsp, cost_benefit, nsegs, options);
		return (1);
	} else {
		/* 
		 * We will also clean if the system is reasonably idle and
		 * the total clean space is less than IDLE_LIM % of possible
		 * clean space.
		 */
		if (getloadavg(loadavg, MAXLOADS) == -1) {
			perror("getloadavg: failed\n");
			return (-1);
		}
		if (loadavg[ONE_MIN] == 0.0 && loadavg[FIVE_MIN] &&
		    fsp->fi_cip->clean < max_free_segs * IDLE_LIM) {
			clean_fs(fsp, cost_benefit, nsegs, options);
			printf("Cleaner Running  at %s (system idle)\n",
			    ctime(&now));
			return (1);
		}
	} 
	printf("Cleaner Not Running at %s\n", ctime(&now));
	return (0);
}


void
clean_fs(FS_INFO *fsp, int (*cost_func)(FS_INFO *, SEGUSE *),
    int nsegs, long options)
{
	struct seglist *segs, *sp;
	int to_clean, cleaned_bytes;
	int i;

	if ((segs =
	    malloc(fsp->fi_lfs.lfs_nseg * sizeof(struct seglist))) == NULL) {
		err(0, "malloc failed");
		return;
	}
	i = choose_segments(fsp, segs, cost_func);
#ifdef VERBOSE
	printf("clean_fs: found %d segments to clean in file system %s\n",
		i, fsp->fi_statfsp->f_mntonname);
	fflush(stdout);
#endif
	if (i) {
		/* Check which cleaning algorithm to use. */
		if (options & CLEAN_BYTES) {
			cleaned_bytes = 0;
			to_clean = nsegs <<
			    (fsp->fi_lfs.lfs_segshift + fsp->fi_lfs.lfs_bshift);
			for (sp = segs; i && cleaned_bytes < to_clean;
			    i--, ++sp) {
				if (clean_segment(fsp, sp->sl_id) < 0)
					perror("clean_segment failed");
				else if (lfs_segclean(&fsp->fi_statfsp->f_fsid,
				    sp->sl_id) < 0)
					perror("lfs_segclean failed");
				printf("Cleaned segment %d (%d bytes)\n",
				    sp->sl_id, sp->sl_bytes);
				cleaned_bytes += sp->sl_bytes;
			}
		} else
			for (i = MIN(i, nsegs), sp = segs; i-- ; ++sp) {
				if (clean_segment(fsp, sp->sl_id) < 0)
					perror("clean_segment failed");
				else if (lfs_segclean(&fsp->fi_statfsp->f_fsid,
				    sp->sl_id) < 0)
					perror("lfs_segclean failed");
				printf("Completed cleaning segment %d\n", sp->sl_id);
			}
	}
	free(segs);
}

/*
 * Segment with the highest priority get sorted to the beginning of the
 * list.  This sort assumes that empty segments always have a higher
 * cost/benefit than any utilized segment.
 */
int
cost_compare(const void *a, const void *b)
{
	return (((struct seglist *)b)->sl_cost -
	    ((struct seglist *)a)->sl_cost);
}


/*
 * Returns the number of segments to be cleaned with the elements of seglist
 * filled in.
 */
int
choose_segments(FS_INFO *fsp, struct seglist *seglist,
    int (*cost_func)(FS_INFO *, SEGUSE *))
{
	struct lfs *lfsp;
	struct seglist *sp;
	SEGUSE *sup;
	int i, nsegs;

	lfsp = &fsp->fi_lfs;

#ifdef VERBOSE
	(void)printf("Entering choose_segments\n");
#endif
	dump_super(lfsp);
	dump_cleaner_info(fsp->fi_cip);

	for (sp = seglist, i = 0; i < lfsp->lfs_nseg; ++i) {
		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, i);
		 PRINT_SEGUSE(sup, i);
		if (!(sup->su_flags & SEGUSE_DIRTY) ||
		    sup->su_flags & SEGUSE_ACTIVE)
			continue;
#ifdef VERBOSE
		(void)printf("\tchoosing segment %d\n", i);
#endif
		sp->sl_cost = (*cost_func)(fsp, sup);
		sp->sl_id = i;
		sp->sl_bytes = sup->su_nbytes;
		++sp;
	}
	nsegs = sp - seglist;
	qsort(seglist, nsegs, sizeof(struct seglist), cost_compare);
#ifdef VERBOSE
	(void)printf("Returning %d segments\n", nsegs);
#endif
	return (nsegs);
}


int
clean_segment(FS_INFO *fsp, int id)
{
	BLOCK_INFO *block_array, *bp;
	SEGUSE *sp;
	struct lfs *lfsp;
	struct tossstruct t;
	caddr_t seg_buf;
	double util;
	int num_blocks, maxblocks, clean_blocks;

	lfsp = &fsp->fi_lfs;
	sp = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, id);

#ifdef VERBOSE
	(void)printf("cleaning segment %d: contains %lu bytes\n", id,
	    sp->su_nbytes);
	fflush(stdout);
#endif
	/* XXX could add debugging to verify that segment is really empty */
	if (sp->su_nbytes == sp->su_nsums * LFS_SUMMARY_SIZE) {
		++cleaner_stats.segs_empty;
		return (0);
	}

	/* map the segment into a buffer */
	if (mmap_segment(fsp, id, &seg_buf, do_mmap) < 0) {
		err(0, "mmap_segment failed");
		++cleaner_stats.segs_error;
		return (-1);
	}
	/* get a list of blocks that are contained by the segment */
	if (lfs_segmapv(fsp, id, seg_buf, &block_array, &num_blocks) < 0) {
		err(0, "clean_segment: lfs_segmapv failed");
		++cleaner_stats.segs_error;
		return (-1);
	}
	cleaner_stats.blocks_read += fsp->fi_lfs.lfs_ssize;

#ifdef VERBOSE
	(void)printf("lfs_segmapv returned %d blocks\n", num_blocks);
	fflush(stdout);
#endif

	/* get the current disk address of blocks contained by the segment */
	if (lfs_bmapv(&fsp->fi_statfsp->f_fsid, block_array, num_blocks) < 0) {
		perror("clean_segment: lfs_bmapv failed\n");
		++cleaner_stats.segs_error;
		return -1;
	}

	/* Now toss any blocks not in the current segment */
	t.lfs = lfsp;
	t.seg = id;
	toss(block_array, &num_blocks, sizeof(BLOCK_INFO), bi_tossold, &t);

	/* Check if last element should be tossed */
	if (num_blocks && bi_tossold(&t, block_array + num_blocks - 1, NULL))
		--num_blocks;

#ifdef VERBOSE
	{
		BLOCK_INFO *_bip;
		u_long *lp;
		int i;

		(void)printf("after bmapv still have %d blocks\n", num_blocks);
		fflush(stdout);
		if (num_blocks)
			printf("BLOCK INFOS\n");
		for (_bip = block_array, i=0; i < num_blocks; ++_bip, ++i) {
			PRINT_BINFO(_bip);
			lp = (u_long *)_bip->bi_bp;
		}
	}

#endif
	++cleaner_stats.segs_cleaned;
	cleaner_stats.blocks_written += num_blocks;
	util = ((double)num_blocks / fsp->fi_lfs.lfs_ssize);
	cleaner_stats.util_tot += util;
	cleaner_stats.util_sos += util * util;

	if (do_small)
		maxblocks = MAXPHYS / fsp->fi_lfs.lfs_bsize - 1;
	else
		maxblocks = num_blocks;

	for (bp = block_array; num_blocks > 0; bp += clean_blocks) {
		clean_blocks = maxblocks < num_blocks ? maxblocks : num_blocks;
		if (lfs_markv(&fsp->fi_statfsp->f_fsid,
		    bp, clean_blocks) < 0) {
			err(0, "clean_segment: lfs_markv failed");
			++cleaner_stats.segs_error;
			return (-1);
		}
		num_blocks -= clean_blocks;
	}
		
	free(block_array);
	munmap_segment(fsp, seg_buf, do_mmap);
	if (stat_report && cleaner_stats.segs_cleaned % stat_report == 0)
		sig_report(SIGUSR1);
	return (0);
}


int
bi_tossold(const void *client, const void *a, const void *b)
{
	const struct tossstruct *t;

	t = (struct tossstruct *)client;

	return (((BLOCK_INFO *)a)->bi_daddr == LFS_UNUSED_DADDR ||
	    datosn(t->lfs, ((BLOCK_INFO *)a)->bi_daddr) != t->seg);
}

void
sig_report(int sig)
{
	double avg;
	int save_errno = errno;

	printf("lfs_cleanerd:\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n\t\t%s%d\n",
		"blocks_read    ", cleaner_stats.blocks_read,
		"blocks_written ", cleaner_stats.blocks_written,
		"segs_cleaned   ", cleaner_stats.segs_cleaned,
		"segs_empty     ", cleaner_stats.segs_empty,
		"seg_error      ", cleaner_stats.segs_error);
	printf("\t\t%s%5.2f\n\t\t%s%5.2f\n",
		"util_tot       ", cleaner_stats.util_tot,
		"util_sos       ", cleaner_stats.util_sos);
	printf("\t\tavg util: %4.2f std dev: %9.6f\n",
		avg = cleaner_stats.util_tot / cleaner_stats.segs_cleaned,
		cleaner_stats.util_sos / cleaner_stats.segs_cleaned - avg * avg);
		
		
	if (sig == SIGUSR2) {
		cleaner_stats.blocks_read = 0;
		cleaner_stats.blocks_written = 0;
		cleaner_stats.segs_cleaned = 0;
		cleaner_stats.segs_empty = 0;
		cleaner_stats.segs_error = 0;
		cleaner_stats.util_tot = 0.0;
		cleaner_stats.util_sos = 0.0;
	}
	if (sig == SIGINT)
		exit(0);
	errno = save_errno;
}