diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/arch/i386/i386/acpi_wakecode.S | 27 | ||||
-rw-r--r-- | sys/arch/i386/i386/hibernate_machdep.c | 492 | ||||
-rw-r--r-- | sys/arch/i386/include/hibernate.h | 8 | ||||
-rw-r--r-- | sys/arch/i386/include/hibernate_var.h | 19 | ||||
-rw-r--r-- | sys/kern/subr_hibernate.c | 978 | ||||
-rw-r--r-- | sys/sys/hibernate.h | 49 |
6 files changed, 1035 insertions, 538 deletions
diff --git a/sys/arch/i386/i386/acpi_wakecode.S b/sys/arch/i386/i386/acpi_wakecode.S index fd4261c5a0a..63e71928bd5 100644 --- a/sys/arch/i386/i386/acpi_wakecode.S +++ b/sys/arch/i386/i386/acpi_wakecode.S @@ -53,7 +53,10 @@ #define _ACPI_TRMP_LABEL(a) a = . - _C_LABEL(acpi_real_mode_resume) + ACPI_TRAMPOLINE #define _ACPI_TRMP_OFFSET(a) a = . - _C_LABEL(acpi_real_mode_resume) #define _ACPI_RM_SEGMENT (ACPI_TRAMPOLINE >> 4) + +#ifdef HIBERNATE #define HIBERNATE_STACK_OFFSET 0x0F00 +#endif /* * On wakeup, we'll start executing at acpi_real_mode_resume. @@ -320,12 +323,13 @@ _C_LABEL(acpi_protected_mode_resume): xorl %eax, %eax jmp *acpi_saved_ret +#ifdef HIBERNATE /* - * hibernate_resume_machine drops to real mode and + * hibernate_resume_machdep drops to real mode and * restarts the OS using the saved S3 resume vector */ .code32 -NENTRY(hibernate_resume_machine) +NENTRY(hibernate_resume_machdep) cli /* Jump to the identity mapped version of ourself */ mov $hibernate_resume_vector_2, %eax @@ -363,32 +367,37 @@ _ACPI_TRMP_LABEL(hibernate_resume_vector_3) .code32 /* Switch to hibernate resume pagetable */ -NENTRY(hibernate_activate_resume_pt) +NENTRY(hibernate_activate_resume_pt_machdep) /* Enable large pages */ movl %eax, %cr4 orl $(CR4_PSE), %eax movl %eax, %cr4 - movl $HIBERNATE_PT_PAGE, %eax + movl $HIBERNATE_PD_PAGE, %eax movl %eax, %cr3 jmp 1f 1: nop ret -NENTRY(hibernate_switch_stack) + /* + * Switch to the private resume-time hibernate stack + */ +NENTRY(hibernate_switch_stack_machdep) movl (%esp), %eax - movl %eax, HIBERNATE_STACK_PAGE + HIBERNATE_STACK_OFFSET - movl $(HIBERNATE_STACK_PAGE + HIBERNATE_STACK_OFFSET), %eax - movl %eax, %esp + movl %eax, HIBERNATE_STACK_PAGE + HIBERNATE_STACK_OFFSET + movl $(HIBERNATE_STACK_PAGE + HIBERNATE_STACK_OFFSET), %eax + movl %eax, %esp /* On our own stack from here onward */ ret NENTRY(hibernate_flush) wbinvd - invlpg HIBERNATE_TEMP_PAGE + movl $hibernate_inflate_page, %eax + invlpg (%eax) ret +#endif /* HIBERNATE */ .code16 .align 8 diff --git a/sys/arch/i386/i386/hibernate_machdep.c b/sys/arch/i386/i386/hibernate_machdep.c index f9b4ba73c5d..21af80dde9b 100644 --- a/sys/arch/i386/i386/hibernate_machdep.c +++ b/sys/arch/i386/i386/hibernate_machdep.c @@ -44,23 +44,15 @@ #include "acpi.h" #include "wd.h" -#ifndef SMALL_KERNEL /* Hibernate support */ -int hibernate_write_image(void); -int hibernate_read_image(void); -void hibernate_unpack_image(void); void hibernate_enter_resume_4k_pte(vaddr_t, paddr_t); void hibernate_enter_resume_4k_pde(vaddr_t); void hibernate_enter_resume_4m_pde(vaddr_t, paddr_t); -void hibernate_populate_resume_pt(paddr_t, paddr_t); -int get_hibernate_info_md(union hibernate_info *); +int hibernate_read_chunks(union hibernate_info *, paddr_t, paddr_t, size_t); -union hibernate_info *global_hiber_info; -paddr_t global_image_start; +extern vaddr_t hibernate_inflate_page; -extern void hibernate_resume_machine(void); -extern void hibernate_activate_resume_pt(void); -extern void hibernate_switch_stack(void); +extern void hibernate_resume_machdep(void); extern void hibernate_flush(void); extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t); extern caddr_t start, end; @@ -73,41 +65,6 @@ extern struct hibernate_state *hibernate_state; */ /* - * hibernate_zlib_reset - * - * Reset the zlib stream state and allocate a new hiballoc area for either - * inflate or deflate. This function is called once for each hibernate chunk - * Calling hiballoc_init multiple times is acceptable since the memory it is - * provided is unmanaged memory (stolen). - * - */ -int -hibernate_zlib_reset(int deflate) -{ - hibernate_state = (struct hibernate_state *)HIBERNATE_ZLIB_SCRATCH; - - bzero((caddr_t)HIBERNATE_ZLIB_START, HIBERNATE_ZLIB_SIZE); - bzero((caddr_t)HIBERNATE_ZLIB_SCRATCH, PAGE_SIZE); - - /* Set up stream structure */ - hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; - hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; - - /* Initialize the hiballoc arena for zlib allocs/frees */ - hiballoc_init(&hibernate_state->hiballoc_arena, - (caddr_t)HIBERNATE_ZLIB_START, HIBERNATE_ZLIB_SIZE); - - if (deflate) { - return deflateInit(&hibernate_state->hib_stream, - Z_DEFAULT_COMPRESSION); - } - else - return inflateInit(&hibernate_state->hib_stream); -} - -/* - * get_hibernate_io_function - * * Returns the hibernate write I/O function to use on this machine * */ @@ -126,8 +83,6 @@ get_hibernate_io_function() } /* - * get_hibernate_info_md - * * Gather MD-specific data and store into hiber_info */ int @@ -165,18 +120,17 @@ get_hibernate_info_md(union hibernate_info *hiber_info) /* * Enter a mapping for va->pa in the resume pagetable, using - * the specified size hint. + * the specified size. * - * hint : 0 if a 4KB mapping is desired + * size : 0 if a 4KB mapping is desired * 1 if a 4MB mapping is desired */ void -hibernate_enter_resume_mapping(vaddr_t va, paddr_t pa, int hint) +hibernate_enter_resume_mapping(vaddr_t va, paddr_t pa, int size) { - if (hint) + if (size) return hibernate_enter_resume_4m_pde(va, pa); else { - hibernate_enter_resume_4k_pde(va); return hibernate_enter_resume_4k_pte(va, pa); } } @@ -222,19 +176,32 @@ hibernate_enter_resume_4k_pde(vaddr_t va) /* * Create the resume-time page table. This table maps the image(pig) area, - * the kernel text area, and various utility pages located in low memory for - * use during resume, since we cannot overwrite the resuming kernel's - * page table during inflate and expect things to work properly. + * the kernel text area, and various utility pages for use during resume, + * since we cannot overwrite the resuming kernel's page table during inflate + * and expect things to work properly. */ void -hibernate_populate_resume_pt(paddr_t image_start, paddr_t image_end) +hibernate_populate_resume_pt(union hibernate_info *hib_info, + paddr_t image_start, paddr_t image_end) { int phys_page_number, i; - paddr_t pa; + paddr_t pa, piglet_start, piglet_end; vaddr_t kern_start_4m_va, kern_end_4m_va, page; + /* Identity map PD, PT, and stack pages */ + pmap_kenter_pa(HIBERNATE_PT_PAGE, HIBERNATE_PT_PAGE, VM_PROT_ALL); + pmap_kenter_pa(HIBERNATE_PD_PAGE, HIBERNATE_PD_PAGE, VM_PROT_ALL); + pmap_kenter_pa(HIBERNATE_STACK_PAGE, HIBERNATE_STACK_PAGE, VM_PROT_ALL); + pmap_activate(curproc); + + hibernate_inflate_page = HIBERNATE_INFLATE_PAGE; + bzero((caddr_t)HIBERNATE_PT_PAGE, PAGE_SIZE); bzero((caddr_t)HIBERNATE_PD_PAGE, PAGE_SIZE); + bzero((caddr_t)HIBERNATE_STACK_PAGE, PAGE_SIZE); + + /* PDE for low pages */ + hibernate_enter_resume_4k_pde(0); /* * Identity map first 640KB physical for tramps and special utility @@ -245,15 +212,6 @@ hibernate_populate_resume_pt(paddr_t image_start, paddr_t image_end) } /* - * Identity map chunk table - */ - for (i=0; i < HIBERNATE_CHUNK_TABLE_SIZE/PAGE_SIZE; i ++) { - hibernate_enter_resume_mapping( - (vaddr_t)HIBERNATE_CHUNK_TABLE_START + (i*PAGE_SIZE), - HIBERNATE_CHUNK_TABLE_START + (i*PAGE_SIZE), 0); - } - - /* * Map current kernel VA range using 4M pages */ kern_start_4m_va = (paddr_t)&start & ~(PAGE_MASK_4M); @@ -279,353 +237,76 @@ hibernate_populate_resume_pt(paddr_t image_start, paddr_t image_end) pa = (paddr_t)(phys_page_number * NBPD); hibernate_enter_resume_mapping(page, pa, 1); } -} - -/* - * hibernate_write_image - * - * Write a compressed version of this machine's memory to disk, at the - * precalculated swap offset: - * - * end of swap - signature block size - chunk table size - memory size - * - * The function begins by mapping various pages into the suspending - * kernel's pmap, then loops through each phys mem range, cutting each - * one into 4MB chunks. These chunks are then compressed individually - * and written out to disk, in phys mem order. Some chunks might compress - * more than others, and for this reason, each chunk's size is recorded - * in the chunk table, which is written to disk after the image has - * properly been compressed and written. - * - * When this function is called, the machine is nearly suspended - most - * devices are quiesced/suspended, interrupts are off, and cold has - * been set. This means that there can be no side effects once the - * write has started, and the write function itself can also have no - * side effects. - */ -int -hibernate_write_image() -{ - union hibernate_info hiber_info; - paddr_t range_base, range_end, inaddr, temp_inaddr; - vaddr_t zlib_range; - daddr_t blkctr; - int i; - size_t nblocks, out_remaining, used, offset; - struct hibernate_disk_chunk *chunks; - - /* Get current running machine's hibernate info */ - bzero(&hiber_info, sizeof(hiber_info)); - if (get_hibernate_info(&hiber_info)) - return (1); - - zlib_range = HIBERNATE_ZLIB_START; - - /* Map utility pages */ - pmap_kenter_pa(HIBERNATE_ALLOC_PAGE, HIBERNATE_ALLOC_PAGE, - VM_PROT_ALL); - pmap_kenter_pa(HIBERNATE_IO_PAGE, HIBERNATE_IO_PAGE, VM_PROT_ALL); - pmap_kenter_pa(HIBERNATE_TEMP_PAGE, HIBERNATE_TEMP_PAGE, - VM_PROT_ALL); - pmap_kenter_pa(HIBERNATE_ZLIB_SCRATCH, HIBERNATE_ZLIB_SCRATCH, - VM_PROT_ALL); - - /* Map the zlib allocation ranges */ - for(zlib_range = HIBERNATE_ZLIB_START; zlib_range < HIBERNATE_ZLIB_END; - zlib_range += PAGE_SIZE) { - pmap_kenter_pa((vaddr_t)(zlib_range+i), - (paddr_t)(zlib_range+i), - VM_PROT_ALL); - } - - /* Identity map the chunktable */ - for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE) { - pmap_kenter_pa((vaddr_t)(HIBERNATE_CHUNK_TABLE_START+i), - (paddr_t)(HIBERNATE_CHUNK_TABLE_START+i), - VM_PROT_ALL); - } - - pmap_activate(curproc); - - blkctr = hiber_info.image_offset; - hiber_info.chunk_ctr = 0; - offset = 0; - chunks = (struct hibernate_disk_chunk *)HIBERNATE_CHUNK_TABLE_START; - - /* Calculate the chunk regions */ - for (i=0; i < hiber_info.nranges; i++) { - range_base = hiber_info.ranges[i].base; - range_end = hiber_info.ranges[i].end; - - inaddr = range_base; - - while (inaddr < range_end) { - chunks[hiber_info.chunk_ctr].base = inaddr; - if (inaddr + HIBERNATE_CHUNK_SIZE < range_end) - chunks[hiber_info.chunk_ctr].end = inaddr + - HIBERNATE_CHUNK_SIZE; - else - chunks[hiber_info.chunk_ctr].end = range_end; - - inaddr += HIBERNATE_CHUNK_SIZE; - hiber_info.chunk_ctr ++; - } - } - - /* Compress and write the chunks in the chunktable */ - for (i=0; i < hiber_info.chunk_ctr; i++) { - range_base = chunks[i].base; - range_end = chunks[i].end; - - chunks[i].offset = blkctr; - - /* Reset zlib for deflate */ - if (hibernate_zlib_reset(1) != Z_OK) - return (1); - - inaddr = range_base; - - /* - * For each range, loop through its phys mem region - * and write out 4MB chunks (the last chunk might be - * smaller than 4MB). - */ - while (inaddr < range_end) { - out_remaining = PAGE_SIZE; - while (out_remaining > 0 && inaddr < range_end) { - pmap_kenter_pa(HIBERNATE_TEMP_PAGE2, - inaddr & PMAP_PA_MASK, VM_PROT_ALL); - pmap_activate(curproc); - bcopy((caddr_t)HIBERNATE_TEMP_PAGE2, - (caddr_t)HIBERNATE_TEMP_PAGE, PAGE_SIZE); - - /* Adjust for non page-sized regions */ - temp_inaddr = (inaddr & PAGE_MASK) + - HIBERNATE_TEMP_PAGE; - - /* Deflate from temp_inaddr to IO page */ - inaddr += hibernate_deflate(temp_inaddr, - &out_remaining); - } - - if (out_remaining == 0) { - /* Filled up the page */ - nblocks = PAGE_SIZE / hiber_info.secsize; - - if(hiber_info.io_func(hiber_info.device, blkctr, - (vaddr_t)HIBERNATE_IO_PAGE, PAGE_SIZE, - 1, (void *)HIBERNATE_ALLOC_PAGE)) - return (1); - - blkctr += nblocks; - } - - } - - if (inaddr != range_end) - return (1); - - /* - * End of range. Round up to next secsize bytes - * after finishing compress - */ - if (out_remaining == 0) - out_remaining = PAGE_SIZE; - - /* Finish compress */ - hibernate_state->hib_stream.avail_in = 0; - hibernate_state->hib_stream.avail_out = out_remaining; - hibernate_state->hib_stream.next_in = (caddr_t)inaddr; - hibernate_state->hib_stream.next_out = - (caddr_t)HIBERNATE_IO_PAGE + (PAGE_SIZE - out_remaining); - - if (deflate(&hibernate_state->hib_stream, Z_FINISH) != - Z_STREAM_END) - return (1); - - out_remaining = hibernate_state->hib_stream.avail_out; - - used = PAGE_SIZE - out_remaining; - nblocks = used / hiber_info.secsize; - - /* Round up to next block if needed */ - if (used % hiber_info.secsize != 0) - nblocks ++; - - /* Write final block(s) for this chunk */ - if( hiber_info.io_func(hiber_info.device, blkctr, - (vaddr_t)HIBERNATE_IO_PAGE, nblocks*hiber_info.secsize, - 1, (void *)HIBERNATE_ALLOC_PAGE)) - return (1); - - blkctr += nblocks; - - offset = blkctr; - chunks[i].compressed_size= - (offset-chunks[i].offset)*hiber_info.secsize; - } - - /* Image write complete, write the signature+chunk table and return */ - return hibernate_write_signature(&hiber_info); -} - -int -hibernate_read_image() -{ - union hibernate_info hiber_info; - int i, j; - paddr_t range_base, range_end, addr; - daddr_t blkctr; - - /* Get current running machine's hibernate info */ - if (get_hibernate_info(&hiber_info)) - return (1); - - pmap_kenter_pa(HIBERNATE_TEMP_PAGE, HIBERNATE_TEMP_PAGE, VM_PROT_ALL); - pmap_kenter_pa(HIBERNATE_ALLOC_PAGE, HIBERNATE_ALLOC_PAGE, VM_PROT_ALL); - - blkctr = hiber_info.image_offset; - - for (i=0; i < hiber_info.nranges; i++) { - range_base = hiber_info.ranges[i].base; - range_end = hiber_info.ranges[i].end; - - for (j=0; j < (range_end - range_base)/NBPG; - blkctr += (NBPG/512), j += NBPG) { - addr = range_base + j; - pmap_kenter_pa(HIBERNATE_TEMP_PAGE, addr, - VM_PROT_ALL); - hiber_info.io_func(hiber_info.device, blkctr, - (vaddr_t)HIBERNATE_IO_PAGE, NBPG, 1, - (void *)HIBERNATE_ALLOC_PAGE); - bcopy((caddr_t)HIBERNATE_IO_PAGE, - (caddr_t)HIBERNATE_TEMP_PAGE, - NBPG); - - } - } - - /* Read complete, clear the signature and return */ - return hibernate_clear_signature(); -} -int -hibernate_suspend() -{ /* - * On i386, the only thing to do on hibernate suspend is - * to zero all the unused pages and write the image. + * Map the piglet */ - - uvm_pmr_zero_everything(); - - return hibernate_write_image(); -} - -/* Unpack image from resumed image to real location */ -void -hibernate_unpack_image() -{ - union hibernate_info *hiber_info = global_hiber_info; - int i, j; - paddr_t base, end, pig_base; - - hibernate_activate_resume_pt(); - - for (i=0; i<hiber_info->nranges; i++) { - base = hiber_info->ranges[i].base; - end = hiber_info->ranges[i].end; - pig_base = base + global_image_start; - - for (j=base; j< (end - base)/NBPD; j++) { - hibernate_enter_resume_mapping(base, base, 0); - bcopy((caddr_t)pig_base, (caddr_t)base, NBPD); - } + phys_page_number = hib_info->piglet_pa / NBPD; + piglet_start = hib_info->piglet_va; + piglet_end = piglet_start + HIBERNATE_CHUNK_SIZE * 3; + piglet_start &= ~(PAGE_MASK_4M); + piglet_end &= ~(PAGE_MASK_4M); + for (page = piglet_start; page <= piglet_end ; + page += NBPD, phys_page_number++) { + pa = (paddr_t)(phys_page_number * NBPD); + hibernate_enter_resume_mapping(page, pa, 1); } } +/* + * MD-specific resume preparation (creating resume time pagetables, + * stacks, etc). + * + * On i386, we use the piglet whose address is contained in hib_info + * as per the following layout: + * + * offset from piglet base use + * ----------------------- -------------------- + * 0 i/o allocation area + * PAGE_SIZE i/o read area + * 2*PAGE_SIZE temp/scratch page + * 5*PAGE_SIZE resume stack + * 6*PAGE_SIZE hiballoc arena + * 7*PAGE_SIZE to 87*PAGE_SIZE zlib inflate area + * ... + * HIBERNATE_CHUNK_SIZE chunk table + * 2*HIBERNATE_CHUNK_SIZE bounce/copy area + */ void -hibernate_resume() +hibernate_prepare_resume_machdep(union hibernate_info *hib_info) { - union hibernate_info hiber_info, disk_hiber_info; - u_int8_t *io_page; - int s; - paddr_t image_start; - - /* Get current running machine's hibernate info */ - if (get_hibernate_info(&hiber_info)) - return; - - io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); - if (!io_page) - return; - - /* Read hibernate info from disk */ - s = splbio(); - hiber_info.io_func(hiber_info.device, hiber_info.sig_offset, - (vaddr_t)&disk_hiber_info, 512, 0, io_page); - - free(io_page, M_DEVBUF); - - if (memcmp(&hiber_info, &disk_hiber_info, - sizeof(union hibernate_info)) !=0) { - return; - } + paddr_t pa, piglet_end; + vaddr_t va; /* - * On-disk and in-memory hibernate signatures match, - * this means we should do a resume from hibernate. + * At this point, we are sure that the piglet's phys + * space is going to have been unused by the suspending + * kernel, but the vaddrs used by the suspending kernel + * may or may not be available to us here in the + * resuming kernel, so we allocate a new range of VAs + * for the piglet. Those VAs will be temporary and will + * cease to exist as soon as we switch to the resume + * PT, so we need to ensure that any VAs required during + * inflate are also entered into that map. */ - disable_intr(); - cold = 1; - - /* - * Add mappings for resume stack and PT page tables - * into the "resuming" kernel. We use these mappings - * during image read and copy - */ - pmap_activate(curproc); - pmap_kenter_pa((vaddr_t)HIBERNATE_STACK_PAGE, - (paddr_t)HIBERNATE_STACK_PAGE, - VM_PROT_ALL); - pmap_kenter_pa((vaddr_t)HIBERNATE_PT_PAGE, - (paddr_t)HIBERNATE_PT_PAGE, - VM_PROT_ALL); - - /* - * We can't access any of this function's local variables (via - * stack) after we switch stacks, so we stash hiber_info and - * the image start area into temporary global variables first. - */ - global_hiber_info = &hiber_info; - global_image_start = image_start; + hib_info->piglet_va = (vaddr_t)km_alloc(HIBERNATE_CHUNK_SIZE*3, + &kv_any, &kp_none, &kd_nowait); + if (!hib_info->piglet_va) + panic("Unable to allocate vaddr for hibernate resume piglet\n"); - /* Switch stacks */ - hibernate_switch_stack(); + piglet_end = hib_info->piglet_pa + HIBERNATE_CHUNK_SIZE*3; - /* Read the image from disk into the image (pig) area */ - if (hibernate_read_image()) - panic("Failed to restore the hibernate image"); + for (pa = hib_info->piglet_pa, + va = hib_info->piglet_va; + pa <= piglet_end; + pa += PAGE_SIZE, va += PAGE_SIZE) + pmap_kenter_pa(va, pa, VM_PROT_ALL); - /* - * Image is now in high memory (pig area), copy to "correct" - * location in memory. We'll eventually end up copying on top - * of ourself, but we are assured the kernel code here is - * the same between the hibernated and resuming kernel, - * and we are running on our own stack - */ - hibernate_unpack_image(); - - /* - * Resume the loaded kernel by jumping to the S3 resume vector - */ - hibernate_resume_machine(); + pmap_activate(curproc); } /* - * hibernate_inflate_skip - * * During inflate, certain pages that contain our bookkeeping information * (eg, the chunk table, scratch pages, etc) need to be skipped over and * not inflated into. @@ -633,23 +314,12 @@ hibernate_resume() * Returns 1 if the physical page at dest should be skipped, 0 otherwise */ int -hibernate_inflate_skip(paddr_t dest) +hibernate_inflate_skip(union hibernate_info *hib_info, paddr_t dest) { - /* Chunk Table */ - if (dest >= HIBERNATE_CHUNK_TABLE_START && - dest <= HIBERNATE_CHUNK_TABLE_END) - return (1); - - /* Contiguous utility pages */ - if (dest >= HIBERNATE_STACK_PAGE && - dest <= HIBERNATE_CHUNKS_PAGE) - return (1); - - /* libz hiballoc arena */ - if (dest >= HIBERNATE_ZLIB_SCRATCH && - dest <= HIBERNATE_ZLIB_END) + if (dest >= hib_info->piglet_pa && + dest <= (hib_info->piglet_pa + 3 * HIBERNATE_CHUNK_SIZE)) return (1); return (0); } -#endif /* !SMALL_KERNEL */ + diff --git a/sys/arch/i386/include/hibernate.h b/sys/arch/i386/include/hibernate.h index 5f3b7042eae..19c0b8dd63f 100644 --- a/sys/arch/i386/include/hibernate.h +++ b/sys/arch/i386/include/hibernate.h @@ -21,7 +21,9 @@ int get_hibernate_info_md(union hibernate_info *); void hibernate_flush(void); void hibernate_enter_resume_mapping(vaddr_t, paddr_t, int); -int hibernate_zlib_reset(int); -int hibernate_inflate_skip(paddr_t); +int hibernate_inflate_skip(union hibernate_info *, paddr_t); +void hibernate_prepare_resume_machdep(union hibernate_info *); int hibernate_suspend(void); -void hibernate_resume(void); +void hibernate_switch_stack_machdep(void); +void hibernate_resume_machdep(void); +void hibernate_activate_resume_pt_machdep(void); diff --git a/sys/arch/i386/include/hibernate_var.h b/sys/arch/i386/include/hibernate_var.h index 07a14b9836f..b34fd9395cb 100644 --- a/sys/arch/i386/include/hibernate_var.h +++ b/sys/arch/i386/include/hibernate_var.h @@ -20,15 +20,10 @@ #define PAGE_MASK_4M (NBPD - 1) #define PMAP_PA_MASK_4M ~((paddr_t)PAGE_MASK_4M) -#define HIBERNATE_STACK_PAGE (PAGE_SIZE * 5) -#define HIBERNATE_IO_PAGE (PAGE_SIZE * 6) -#define HIBERNATE_TEMP_PAGE (PAGE_SIZE * 10) -#define HIBERNATE_TEMP_PAGE2 (PAGE_SIZE * 11) -#define HIBERNATE_PD_PAGE (PAGE_SIZE * 12) -#define HIBERNATE_PT_PAGE (PAGE_SIZE * 13) -#define HIBERNATE_ALLOC_PAGE (PAGE_SIZE * 14) - -#define HIBERNATE_CHUNKS_PAGE (PAGE_SIZE * 15) +#define HIBERNATE_PD_PAGE (PAGE_SIZE * 5) +#define HIBERNATE_PT_PAGE (PAGE_SIZE * 6) +#define HIBERNATE_STACK_PAGE (PAGE_SIZE * 7) +#define HIBERNATE_INFLATE_PAGE (PAGE_SIZE * 8) /* Use 4MB hibernation chunks */ #define HIBERNATE_CHUNK_SIZE 0x400000 @@ -39,12 +34,6 @@ #define HIBERNATE_CHUNK_TABLE_SIZE (HIBERNATE_CHUNK_TABLE_END - \ HIBERNATE_CHUNK_TABLE_START) -/* 320KB (80 pages) for gzip allocator */ -#define HIBERNATE_ZLIB_SCRATCH (PAGE_SIZE * 20) -#define HIBERNATE_ZLIB_START (PAGE_SIZE * 21) -#define HIBERNATE_ZLIB_END (PAGE_SIZE * (21 + 80)) -#define HIBERNATE_ZLIB_SIZE (HIBERNATE_ZLIB_END - HIBERNATE_ZLIB_START) - #define HIBERNATE_STACK_OFFSET 0x0F00 #define atop_4m(x) ((x) >> PAGE_SHIFT_4M) diff --git a/sys/kern/subr_hibernate.c b/sys/kern/subr_hibernate.c index ea15e4fc82b..ea7fc67aced 100644 --- a/sys/kern/subr_hibernate.c +++ b/sys/kern/subr_hibernate.c @@ -1,7 +1,8 @@ -/* $OpenBSD: subr_hibernate.c,v 1.15 2011/07/18 16:50:56 ariane Exp $ */ +/* $OpenBSD: subr_hibernate.c,v 1.16 2011/09/21 02:51:23 mlarkin Exp $ */ /* * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> + * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -32,7 +33,18 @@ extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t); -struct hibernate_state *hibernate_state; +struct hibernate_zlib_state *hibernate_state; + +/* Temporary vaddr ranges used during hibernate */ +vaddr_t hibernate_temp_page; +vaddr_t hibernate_copy_page; +vaddr_t hibernate_stack_page; +vaddr_t hibernate_fchunk_area; +vaddr_t hibernate_chunktable_area; +vaddr_t hibernate_inflate_page; + +/* Hibernate info as read from disk during resume */ +union hibernate_info disk_hiber_info; /* * Hib alloc enforced alignment. @@ -401,13 +413,14 @@ found: * the allocation to fail. */ int -uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align) +uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) { - vaddr_t pg_addr, piglet_addr; + paddr_t pg_addr, piglet_addr; struct uvm_pmemrange *pmr; struct vm_page *pig_pg, *pg; struct pglist pageq; int pdaemon_woken; + vaddr_t piglet_va; KASSERT((align & (align - 1)) == 0); pdaemon_woken = 0; /* Didn't wake the pagedaemon. */ @@ -419,7 +432,7 @@ uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align) */ if (align < PAGE_SIZE) align = PAGE_SIZE; - sz = atop(round_page(sz)); + sz = round_page(sz); uvm_lock_fpageq(); @@ -436,24 +449,24 @@ retry: piglet_addr = (pg_addr + (align - 1)) & ~(align - 1); if (atop(pg_addr) + pig_pg->fpgsz >= - atop(piglet_addr) + sz) { + atop(piglet_addr) + atop(sz)) { goto found; } } + } - /* - * Try to coerse the pagedaemon into freeing memory - * for the piglet. - * - * pdaemon_woken is set to prevent the code from - * falling into an endless loop. - */ - if (!pdaemon_woken) { - pdaemon_woken = 1; - if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, - ptoa(sz), UVM_PLA_FAILOK) == 0) - goto retry; - } + /* + * Try to coerse the pagedaemon into freeing memory + * for the piglet. + * + * pdaemon_woken is set to prevent the code from + * falling into an endless loop. + */ + if (!pdaemon_woken) { + pdaemon_woken = 1; + if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, + sz, UVM_PLA_FAILOK) == 0) + goto retry; } /* Return failure. */ @@ -466,10 +479,10 @@ found: */ TAILQ_INIT(&pageq); uvm_pmr_extract_range(pmr, pig_pg, - atop(piglet_addr), atop(piglet_addr) + sz, &pageq); + atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq); - *addr = piglet_addr; - uvmexp.free -= sz; + *pa = piglet_addr; + uvmexp.free -= atop(sz); /* * Update pg flags. @@ -493,10 +506,69 @@ found: } uvm_unlock_fpageq(); + + + /* + * Now allocate a va. + * Use direct mappings for the pages. + */ + + piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok); + if (!piglet_va) { + uvm_pglistfree(&pageq); + return ENOMEM; + } + + /* + * Map piglet to va. + */ + TAILQ_FOREACH(pg, &pageq, pageq) { + pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW); + piglet_va += PAGE_SIZE; + } + pmap_update(pmap_kernel()); + return 0; } /* + * Free a piglet area. + */ +void +uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) +{ + paddr_t pa; + struct vm_page *pg; + + /* + * Fix parameters. + */ + sz = round_page(sz); + + /* + * Find the first page in piglet. + * Since piglets are contiguous, the first pg is all we need. + */ + if (!pmap_extract(pmap_kernel(), va, &pa)) + panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va); + pg = PHYS_TO_VM_PAGE(pa); + if (pg == NULL) + panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa); + + /* + * Unmap. + */ + pmap_kremove(va, sz); + pmap_update(pmap_kernel()); + + /* + * Free the physical and virtual memory. + */ + uvm_pmr_freepages(pg, atop(sz)); + km_free((void*)va, sz, &kv_any, &kp_none); +} + +/* * Physmem RLE compression support. * * Given a physical page address, it will return the number of pages @@ -530,15 +602,13 @@ uvm_page_rle(paddr_t addr) } /* - * get_hibernate_info - * * Fills out the hibernate_info union pointed to by hiber_info * with information about this machine (swap signature block * offsets, number of memory ranges, kernel in use, etc) * */ int -get_hibernate_info(union hibernate_info *hiber_info) +get_hibernate_info(union hibernate_info *hiber_info, int suspend) { int chunktable_size; struct disklabel dl; @@ -575,33 +645,37 @@ get_hibernate_info(union hibernate_info *hiber_info) chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; - /* Calculate memory image location */ - hiber_info->image_offset = dl.d_partitions[1].p_offset + - dl.d_partitions[1].p_size - - (hiber_info->image_size / hiber_info->secsize) - - sizeof(union hibernate_info)/hiber_info->secsize - - chunktable_size; - /* Stash kernel version information */ bzero(&hiber_info->kernel_version, 128); bcopy(version, &hiber_info->kernel_version, min(strlen(version), sizeof(hiber_info->kernel_version)-1)); - /* Allocate piglet region */ - if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE, - HIBERNATE_CHUNK_SIZE)) { - printf("Hibernate failed to allocate the piglet\n"); - return (1); + if (suspend) { + /* Allocate piglet region */ + if (uvm_pmr_alloc_piglet(&hiber_info->piglet_va, + &hiber_info->piglet_pa, + HIBERNATE_CHUNK_SIZE*3, + HIBERNATE_CHUNK_SIZE)) { + printf("Hibernate failed to allocate the piglet\n"); + return (1); + } } - return get_hibernate_info_md(hiber_info); + if (get_hibernate_info_md(hiber_info)) + return (1); + + /* Calculate memory image location */ + hiber_info->image_offset = dl.d_partitions[1].p_offset + + dl.d_partitions[1].p_size - + (hiber_info->image_size / hiber_info->secsize) - + sizeof(union hibernate_info)/hiber_info->secsize - + chunktable_size; + + return (0); } /* - * hibernate_zlib_alloc - * * Allocate nitems*size bytes from the hiballoc area presently in use - * */ void *hibernate_zlib_alloc(void *unused, int nitems, int size) @@ -610,11 +684,8 @@ void } /* - * hibernate_zlib_free - * * Free the memory pointed to by addr in the hiballoc area presently in * use - * */ void hibernate_zlib_free(void *unused, void *addr) @@ -623,8 +694,6 @@ hibernate_zlib_free(void *unused, void *addr) } /* - * hibernate_inflate - * * Inflate size bytes from src into dest, skipping any pages in * [src..dest] that are special (see hibernate_inflate_skip) * @@ -636,16 +705,18 @@ hibernate_zlib_free(void *unused, void *addr) * This function executes while using the resume-time stack * and pmap, and therefore cannot use ddb/printf/etc. Doing so * will likely hang or reset the machine. - * */ void -hibernate_inflate(paddr_t dest, paddr_t src, size_t size) +hibernate_inflate(union hibernate_info *hiber_info, + paddr_t dest, paddr_t src, size_t size) { int i; hibernate_state->hib_stream.avail_in = size; hibernate_state->hib_stream.next_in = (char *)src; + hibernate_inflate_page = hiber_info->piglet_va + 2 * PAGE_SIZE; + do { /* Flush cache and TLB */ hibernate_flush(); @@ -654,17 +725,20 @@ hibernate_inflate(paddr_t dest, paddr_t src, size_t size) * Is this a special page? If yes, redirect the * inflate output to a scratch page (eg, discard it) */ - if (hibernate_inflate_skip(dest)) - hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE, - HIBERNATE_TEMP_PAGE, 0); + if (hibernate_inflate_skip(hiber_info, dest)) + hibernate_enter_resume_mapping( + hibernate_inflate_page, + hiber_info->piglet_pa + 2 * PAGE_SIZE, + 0); else - hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE, + hibernate_enter_resume_mapping( + hibernate_inflate_page, dest, 0); /* Set up the stream for inflate */ hibernate_state->hib_stream.avail_out = PAGE_SIZE; hibernate_state->hib_stream.next_out = - (char *)HIBERNATE_TEMP_PAGE; + (char *)hiber_info->piglet_va + 2 * PAGE_SIZE; /* Process next block of data */ i = inflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH); @@ -681,24 +755,24 @@ hibernate_inflate(paddr_t dest, paddr_t src, size_t size) } /* - * hibernate_deflate - * * deflate from src into the I/O page, up to 'remaining' bytes * * Returns number of input bytes consumed, and may reset * the 'remaining' parameter if not all the output space was consumed * (this information is needed to know how much to write to disk - * */ size_t -hibernate_deflate(paddr_t src, size_t *remaining) +hibernate_deflate(union hibernate_info *hiber_info, paddr_t src, + size_t *remaining) { + vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE; + /* Set up the stream for deflate */ hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK); hibernate_state->hib_stream.avail_out = *remaining; hibernate_state->hib_stream.next_in = (caddr_t)src; - hibernate_state->hib_stream.next_out = (caddr_t)HIBERNATE_IO_PAGE + + hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page + (PAGE_SIZE - *remaining); /* Process next block of data */ @@ -712,8 +786,6 @@ hibernate_deflate(paddr_t src, size_t *remaining) } /* - * hibernate_write_signature - * * Write the hibernation information specified in hiber_info * to the location in swap previously calculated (last block of * swap), called the "signature block". @@ -725,30 +797,63 @@ int hibernate_write_signature(union hibernate_info *hiber_info) { u_int8_t *io_page; - daddr_t chunkbase; - size_t i; + int result = 0; io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!io_page) return (1); /* Write hibernate info to disk */ - if( hiber_info->io_func(hiber_info->device, hiber_info->sig_offset, - (vaddr_t)hiber_info, hiber_info->secsize, 1, io_page)) - panic("error in hibernate write sig\n"); + if (hiber_info->io_func(hiber_info->device, hiber_info->sig_offset, + (vaddr_t)hiber_info, hiber_info->secsize, 1, io_page)) { + result = 1; + } + + free(io_page, M_DEVBUF); + return (result); +} + +/* + * Write the memory chunk table to the area in swap immediately + * preceding the signature block. The chunk table is stored + * in the piglet when this function is called. + */ +int +hibernate_write_chunktable(union hibernate_info *hiber_info) +{ + u_int8_t *io_page; + int i; + daddr_t chunkbase; + vaddr_t hibernate_chunk_table_start; + size_t hibernate_chunk_table_size; + struct hibernate_disk_chunk *chunks; + + io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (!io_page) + return (1); + + hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE; chunkbase = hiber_info->sig_offset - - (HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize); + (hibernate_chunk_table_size / hiber_info->secsize); + + hibernate_chunk_table_start = hiber_info->piglet_va + + HIBERNATE_CHUNK_SIZE; + + chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va + + HIBERNATE_CHUNK_SIZE); /* Write chunk table */ - for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += NBPG) { + for(i=0; i < hibernate_chunk_table_size; i += MAXPHYS) { if(hiber_info->io_func(hiber_info->device, chunkbase + (i/hiber_info->secsize), - (vaddr_t)(HIBERNATE_CHUNK_TABLE_START + i), - NBPG, + (vaddr_t)(hibernate_chunk_table_start + i), + MAXPHYS, 1, - io_page)) - panic("error in hibernate write chunks\n"); + io_page)) { + free(io_page, M_DEVBUF); + return (1); + } } free(io_page, M_DEVBUF); @@ -757,8 +862,6 @@ hibernate_write_signature(union hibernate_info *hiber_info) } /* - * hibernate_clear_signature - * * Write an empty hiber_info to the swap signature block, which is * guaranteed to not match any valid hiber_info. */ @@ -772,7 +875,7 @@ hibernate_clear_signature() /* Zero out a blank hiber_info */ bzero(&blank_hiber_info, sizeof(hiber_info)); - if (get_hibernate_info(&hiber_info)) + if (get_hibernate_info(&hiber_info, 0)) return (1); io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); @@ -780,6 +883,7 @@ hibernate_clear_signature() return (1); /* Write (zeroed) hibernate info to disk */ + /* XXX - use regular kernel write routine for this */ if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset, (vaddr_t)&blank_hiber_info, hiber_info.secsize, 1, io_page)) panic("error hibernate write 6\n"); @@ -790,8 +894,6 @@ hibernate_clear_signature() } /* - * hibernate_check_overlap - * * Check chunk range overlap when calculating whether or not to copy a * compressed chunk to the piglet area before decompressing. * @@ -820,8 +922,6 @@ hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e) } /* - * hibernate_compare_signature - * * Compare two hibernate_infos to determine if they are the same (eg, * we should be performing a hibernate resume on this machine. * Not all fields are checked - just enough to verify that the machine @@ -850,9 +950,7 @@ hibernate_compare_signature(union hibernate_info *mine, } /* - * hibernate_read_block - * - * Reads read_size blocks from the hibernate device specified in + * Reads read_size bytes from the hibernate device specified in * hib_info at offset blkctr. Output is placed into the vaddr specified * at dest. * @@ -912,3 +1010,721 @@ hibernate_read_block(union hibernate_info *hib_info, daddr_t blkctr, return (0); } +/* + * Reads the signature block from swap, checks against the current machine's + * information. If the information matches, perform a resume by reading the + * saved image into the pig area, and unpacking. + */ +void +hibernate_resume() +{ + union hibernate_info hiber_info; + u_int8_t *io_page; + int s; + + /* Scrub temporary vaddr ranges used during resume */ + hibernate_temp_page = (vaddr_t)NULL; + hibernate_fchunk_area = (vaddr_t)NULL; + hibernate_chunktable_area = (vaddr_t)NULL; + hibernate_stack_page = (vaddr_t)NULL; + + /* Get current running machine's hibernate info */ + bzero(&hiber_info, sizeof(hiber_info)); + if (get_hibernate_info(&hiber_info, 0)) + return; + + io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (!io_page) + return; + + /* Read hibernate info from disk */ + s = splbio(); + + /* XXX use regular kernel read routine here */ + if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset, + (vaddr_t)&disk_hiber_info, + hiber_info.secsize, 0, io_page)) + panic("error in hibernate read\n"); + + free(io_page, M_DEVBUF); + + /* + * If on-disk and in-memory hibernate signatures match, + * this means we should do a resume from hibernate. + */ + if (hibernate_compare_signature(&hiber_info, + &disk_hiber_info)) + return; + + /* + * Allocate several regions of vaddrs for use during read. + * These mappings go into the resuming kernel's page table, and are + * used only during image read. + */ + hibernate_temp_page = (vaddr_t)km_alloc(2*PAGE_SIZE, &kv_any, + &kp_none, &kd_nowait); + if (!hibernate_temp_page) + goto fail; + + hibernate_fchunk_area = (vaddr_t)km_alloc(3*PAGE_SIZE, &kv_any, + &kp_none, &kd_nowait); + if (!hibernate_fchunk_area) + goto fail; + + /* Allocate a temporary chunktable area */ + hibernate_chunktable_area = (vaddr_t)malloc(HIBERNATE_CHUNK_TABLE_SIZE, + M_DEVBUF, M_NOWAIT); + if (!hibernate_chunktable_area) + goto fail; + + /* Allocate one temporary page of VAs for the resume time stack */ + hibernate_stack_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, + &kp_none, &kd_nowait); + if (!hibernate_stack_page) + goto fail; + + /* Read the image from disk into the image (pig) area */ + if (hibernate_read_image(&disk_hiber_info)) + goto fail; + + /* Point of no return ... */ + + disable_intr(); + cold = 1; + + /* Switch stacks */ + hibernate_switch_stack_machdep(); + + /* + * Image is now in high memory (pig area), copy to correct location + * in memory. We'll eventually end up copying on top of ourself, but + * we are assured the kernel code here is the same between the + * hibernated and resuming kernel, and we are running on our own + * stack, so the overwrite is ok. + */ + hibernate_unpack_image(&disk_hiber_info); + + /* + * Resume the loaded kernel by jumping to the MD resume vector. + * We won't be returning from this call. + */ + hibernate_resume_machdep(); + +fail: + printf("Unable to resume hibernated image\n"); + + if (hibernate_temp_page) + km_free((void *)hibernate_temp_page, 2*PAGE_SIZE, &kv_any, + &kp_none); + + if (hibernate_fchunk_area) + km_free((void *)hibernate_fchunk_area, 3*PAGE_SIZE, &kv_any, + &kp_none); + + if (io_page) + free((void *)io_page, M_DEVBUF); + + if (hibernate_chunktable_area) + free((void *)hibernate_chunktable_area, M_DEVBUF); +} + +/* + * Unpack image from pig area to original location by looping through the + * list of output chunks in the order they should be restored (fchunks). + * This ordering is used to avoid having inflate overwrite a chunk in the + * middle of processing that chunk. This will, of course, happen during the + * final output chunk, where we copy the chunk to the piglet area first, + * before inflating. + */ +void +hibernate_unpack_image(union hibernate_info *hiber_info) +{ + int i; + paddr_t image_cur; + vaddr_t tempva; + struct hibernate_disk_chunk *chunks; + u_int8_t *fchunks; + + fchunks = (u_int8_t *)((char *)(hiber_info->piglet_va) + + (4 * PAGE_SIZE)); + + /* Copy temporary chunktable to piglet */ + tempva = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any, + &kp_none, &kd_nowait); + for (i=0; i<HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE) + pmap_kenter_pa(tempva + i, hiber_info->piglet_pa + + HIBERNATE_CHUNK_SIZE + i, VM_PROT_ALL); + + bcopy((caddr_t)hibernate_chunktable_area, (caddr_t)tempva, + HIBERNATE_CHUNK_TABLE_SIZE); + + chunks = (struct hibernate_disk_chunk *)((char *)(hiber_info->piglet_va)) + + HIBERNATE_CHUNK_SIZE; + + hibernate_activate_resume_pt_machdep(); + + for (i=0; i<hiber_info->chunk_ctr; i++) { + /* Reset zlib for inflate */ + if (hibernate_zlib_reset(hiber_info, 0) != Z_OK) + panic("hibernate failed to reset zlib for inflate\n"); + + /* + * If there is a conflict, copy the chunk to the piglet area + * before unpacking it to its original location. + */ + if((chunks[fchunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) == 0) + hibernate_inflate(hiber_info, + chunks[fchunks[i]].base, image_cur, + chunks[fchunks[i]].compressed_size); + else { + bcopy((caddr_t)image_cur, + (caddr_t)hiber_info->piglet_va + + HIBERNATE_CHUNK_SIZE * 2, + chunks[fchunks[i]].compressed_size); + hibernate_inflate(hiber_info, + chunks[fchunks[i]].base, + hiber_info->piglet_va + + HIBERNATE_CHUNK_SIZE * 2, + chunks[fchunks[i]].compressed_size); + } + image_cur += chunks[fchunks[i]].compressed_size; + } +} + +/* + * Write a compressed version of this machine's memory to disk, at the + * precalculated swap offset: + * + * end of swap - signature block size - chunk table size - memory size + * + * The function begins by looping through each phys mem range, cutting each + * one into 4MB chunks. These chunks are then compressed individually + * and written out to disk, in phys mem order. Some chunks might compress + * more than others, and for this reason, each chunk's size is recorded + * in the chunk table, which is written to disk after the image has + * properly been compressed and written (in hibernate_write_chunktable). + * + * When this function is called, the machine is nearly suspended - most + * devices are quiesced/suspended, interrupts are off, and cold has + * been set. This means that there can be no side effects once the + * write has started, and the write function itself can also have no + * side effects. + * + * This function uses the piglet area during this process as follows: + * + * offset from piglet base use + * ----------------------- -------------------- + * 0 i/o allocation area + * PAGE_SIZE i/o write area + * 2*PAGE_SIZE temp/scratch page + * 3*PAGE_SIZE temp/scratch page + * 4*PAGE_SIZE hiballoc arena + * 5*PAGE_SIZE to 85*PAGE_SIZE zlib deflate area + * ... + * HIBERNATE_CHUNK_SIZE chunk table temporary area + * + * Some transient piglet content is saved as part of deflate, + * but it is irrelevant during resume as it will be repurposed + * at that time for other things. + */ +int +hibernate_write_chunks(union hibernate_info *hiber_info) +{ + paddr_t range_base, range_end, inaddr, temp_inaddr; + daddr_t blkctr; + int i; + size_t nblocks, out_remaining, used, offset; + struct hibernate_disk_chunk *chunks; + vaddr_t hibernate_alloc_page = hiber_info->piglet_va; + vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE; + + blkctr = hiber_info->image_offset; + hiber_info->chunk_ctr = 0; + offset = 0; + + /* + * Allocate VA for the temp and copy page. + */ + + hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, + &kp_none, &kd_nowait); + if (!hibernate_temp_page) + return (1); + + hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, + &kp_none, &kd_nowait); + if (!hibernate_copy_page) + return (1); + + pmap_kenter_pa(hibernate_copy_page, + (hiber_info->piglet_pa + 3*PAGE_SIZE), + VM_PROT_ALL); + + /* XXX - needed on i386. check other archs */ + pmap_activate(curproc); + + chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va + + HIBERNATE_CHUNK_SIZE); + + /* Calculate the chunk regions */ + for (i=0; i < hiber_info->nranges; i++) { + range_base = hiber_info->ranges[i].base; + range_end = hiber_info->ranges[i].end; + + inaddr = range_base; + + while (inaddr < range_end) { + chunks[hiber_info->chunk_ctr].base = inaddr; + if (inaddr + HIBERNATE_CHUNK_SIZE < range_end) + chunks[hiber_info->chunk_ctr].end = inaddr + + HIBERNATE_CHUNK_SIZE; + else + chunks[hiber_info->chunk_ctr].end = range_end; + + inaddr += HIBERNATE_CHUNK_SIZE; + hiber_info->chunk_ctr ++; + } + } + + /* Compress and write the chunks in the chunktable */ + for (i=0; i < hiber_info->chunk_ctr; i++) { + range_base = chunks[i].base; + range_end = chunks[i].end; + + chunks[i].offset = blkctr; + + /* Reset zlib for deflate */ + if (hibernate_zlib_reset(hiber_info, 1) != Z_OK) + return (1); + + inaddr = range_base; + + /* + * For each range, loop through its phys mem region + * and write out the chunks (the last chunk might be + * smaller than the chunk size). + */ + while (inaddr < range_end) { + out_remaining = PAGE_SIZE; + while (out_remaining > 0 && inaddr < range_end) { + pmap_kenter_pa(hibernate_temp_page, + inaddr & PMAP_PA_MASK, VM_PROT_ALL); + pmap_activate(curproc); + + bcopy((caddr_t)hibernate_temp_page, + (caddr_t)hibernate_copy_page, PAGE_SIZE); + + /* Adjust for non page-sized regions */ + temp_inaddr = (inaddr & PAGE_MASK) + + hibernate_copy_page; + + /* Deflate from temp_inaddr to IO page */ + inaddr += hibernate_deflate(hiber_info, + temp_inaddr, + &out_remaining); + } + + if (out_remaining == 0) { + /* Filled up the page */ + nblocks = PAGE_SIZE / hiber_info->secsize; + + if(hiber_info->io_func(hiber_info->device, blkctr, + (vaddr_t)hibernate_io_page, PAGE_SIZE, + 1, (void *)hibernate_alloc_page)) + return (1); + + blkctr += nblocks; + } + + } + + if (inaddr != range_end) + return (1); + + /* + * End of range. Round up to next secsize bytes + * after finishing compress + */ + if (out_remaining == 0) + out_remaining = PAGE_SIZE; + + /* Finish compress */ + hibernate_state->hib_stream.avail_in = 0; + hibernate_state->hib_stream.avail_out = out_remaining; + hibernate_state->hib_stream.next_in = (caddr_t)inaddr; + hibernate_state->hib_stream.next_out = + (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining); + + if (deflate(&hibernate_state->hib_stream, Z_FINISH) != + Z_STREAM_END) + return (1); + + out_remaining = hibernate_state->hib_stream.avail_out; + + used = PAGE_SIZE - out_remaining; + nblocks = used / hiber_info->secsize; + + /* Round up to next block if needed */ + if (used % hiber_info->secsize != 0) + nblocks ++; + + /* Write final block(s) for this chunk */ + if( hiber_info->io_func(hiber_info->device, blkctr, + (vaddr_t)hibernate_io_page, nblocks*hiber_info->secsize, + 1, (void *)hibernate_alloc_page)) + return (1); + + blkctr += nblocks; + + offset = blkctr; + chunks[i].compressed_size= + (offset-chunks[i].offset)*hiber_info->secsize; + + } + + return (0); +} + +/* + * Reset the zlib stream state and allocate a new hiballoc area for either + * inflate or deflate. This function is called once for each hibernate chunk. + * Calling hiballoc_init multiple times is acceptable since the memory it is + * provided is unmanaged memory (stolen). We use the memory provided to us + * by the piglet allocated via the supplied hiber_info. + */ +int +hibernate_zlib_reset(union hibernate_info *hiber_info, int deflate) +{ + vaddr_t hibernate_zlib_start; + size_t hibernate_zlib_size; + + hibernate_state = (struct hibernate_zlib_state *)hiber_info->piglet_va + + (4 * PAGE_SIZE); + + hibernate_zlib_start = hiber_info->piglet_va + (5 * PAGE_SIZE); + hibernate_zlib_size = 80 * PAGE_SIZE; + + bzero((caddr_t)hibernate_zlib_start, hibernate_zlib_size); + bzero((caddr_t)hibernate_state, PAGE_SIZE); + + /* Set up stream structure */ + hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; + hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; + + /* Initialize the hiballoc arena for zlib allocs/frees */ + hiballoc_init(&hibernate_state->hiballoc_arena, + (caddr_t)hibernate_zlib_start, hibernate_zlib_size); + + if (deflate) { + return deflateInit(&hibernate_state->hib_stream, + Z_DEFAULT_COMPRESSION); + } + else + return inflateInit(&hibernate_state->hib_stream); +} + +/* + * Reads the hibernated memory image from disk, whose location and + * size are recorded in hiber_info. Begin by reading the persisted + * chunk table, which records the original chunk placement location + * and compressed size for each. Next, allocate a pig region of + * sufficient size to hold the compressed image. Next, read the + * chunks into the pig area (calling hibernate_read_chunks to do this), + * and finally, if all of the above succeeds, clear the hibernate signature. + * The function will then return to hibernate_resume, which will proceed + * to unpack the pig image to the correct place in memory. + */ +int +hibernate_read_image(union hibernate_info *hiber_info) +{ + int i; + paddr_t image_start, image_end, pig_start, pig_end; + daddr_t blkctr; + struct hibernate_disk_chunk *chunks; + size_t compressed_size, disk_size, chunktable_size, pig_sz; + + /* Calculate total chunk table size in disk blocks */ + chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; + + blkctr = hiber_info->sig_offset - chunktable_size - + hiber_info->swap_offset; + + for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; + i += MAXPHYS, blkctr += MAXPHYS/hiber_info->secsize) + hibernate_read_block(hiber_info, blkctr, MAXPHYS, + hibernate_chunktable_area + i); + + blkctr = hiber_info->image_offset; + compressed_size = 0; + chunks = (struct hibernate_disk_chunk *)hibernate_chunktable_area; + + for (i=0; i<hiber_info->chunk_ctr; i++) + compressed_size += chunks[i].compressed_size; + + disk_size = compressed_size; + + /* Allocate the pig area */ + pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE; + if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM) + return (1); + + pig_end = pig_start + pig_sz; + + /* Calculate image extents. Pig image must end on a chunk boundary. */ + image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1); + image_start = pig_start; + + image_start = image_end - disk_size; + + hibernate_read_chunks(hiber_info, image_start, image_end, disk_size); + + /* Prepare the resume time pmap/page table */ + hibernate_populate_resume_pt(hiber_info, image_start, image_end); + + /* Read complete, clear the signature and return */ + return hibernate_clear_signature(); +} + +/* + * Read the hibernated memory chunks from disk (chunk information at this + * point is stored in the piglet) into the pig area specified by + * [pig_start .. pig_end]. Order the chunks so that the final chunk is the + * only chunk with overlap possibilities. + * + * This function uses the piglet area during this process as follows: + * + * offset from piglet base use + * ----------------------- -------------------- + * 0 i/o allocation area + * PAGE_SIZE i/o write area + * 2*PAGE_SIZE temp/scratch page + * 3*PAGE_SIZE temp/scratch page + * 4*PAGE_SIZE to 6*PAGE_SIZE chunk ordering area + * 7*PAGE_SIZE hiballoc arena + * 8*PAGE_SIZE to 88*PAGE_SIZE zlib deflate area + * ... + * HIBERNATE_CHUNK_SIZE chunk table temporary area + */ +int +hibernate_read_chunks(union hibernate_info *hib_info, paddr_t pig_start, + paddr_t pig_end, size_t image_compr_size) +{ + paddr_t img_index, img_cur, r1s, r1e, r2s, r2e; + paddr_t copy_start, copy_end, piglet_cur; + paddr_t piglet_base = hib_info->piglet_pa; + paddr_t piglet_end = piglet_base + HIBERNATE_CHUNK_SIZE; + daddr_t blkctr; + size_t processed, compressed_size, read_size; + int i, j, overlap, found, nchunks, nochunks=0, nfchunks=0, npchunks=0; + struct hibernate_disk_chunk *chunks; + u_int8_t *ochunks, *pchunks, *fchunks; + + /* Map the chunk ordering region */ + pmap_kenter_pa(hibernate_fchunk_area, + piglet_base + (4*PAGE_SIZE), VM_PROT_ALL); + pmap_kenter_pa(hibernate_fchunk_area + PAGE_SIZE, + piglet_base + (5*PAGE_SIZE), VM_PROT_ALL); + pmap_kenter_pa(hibernate_fchunk_area + 2*PAGE_SIZE, + piglet_base + (6*PAGE_SIZE), + VM_PROT_ALL); + + /* Temporary output chunk ordering */ + ochunks = (u_int8_t *)hibernate_fchunk_area; + + /* Piglet chunk ordering */ + pchunks = (u_int8_t *)hibernate_fchunk_area + PAGE_SIZE; + + /* Final chunk ordering */ + fchunks = (u_int8_t *)hibernate_fchunk_area + 2*PAGE_SIZE; + + nchunks = hib_info->chunk_ctr; + chunks = (struct hibernate_disk_chunk *)hibernate_chunktable_area; + + /* Initially start all chunks as unplaced */ + for (i=0; i < nchunks; i++) + chunks[i].flags=0; + + /* + * Search the list for chunks that are outside the pig area. These + * can be placed first in the final output list. + */ + for (i=0; i < nchunks; i++) { + if(chunks[i].end <= pig_start || chunks[i].base >= pig_end) { + ochunks[nochunks] = (u_int8_t)i; + fchunks[nfchunks] = (u_int8_t)i; + nochunks++; + nfchunks++; + chunks[i].flags |= HIBERNATE_CHUNK_USED; + } + } + + /* + * Walk the ordering, place the chunks in ascending memory order. + * Conflicts might arise, these are handled next. + */ + do { + img_index = -1; + found=0; + j=-1; + for (i=0; i < nchunks; i++) + if (chunks[i].base < img_index && + chunks[i].flags == 0 ) { + j = i; + img_index = chunks[i].base; + } + + if (j != -1) { + found = 1; + ochunks[nochunks] = (short)j; + nochunks++; + chunks[j].flags |= HIBERNATE_CHUNK_PLACED; + } + } while (found); + + img_index=pig_start; + + /* + * Identify chunk output conflicts (chunks whose pig load area + * corresponds to their original memory placement location) + */ + for(i=0; i< nochunks ; i++) { + overlap=0; + r1s = img_index; + r1e = img_index + chunks[ochunks[i]].compressed_size; + r2s = chunks[ochunks[i]].base; + r2e = chunks[ochunks[i]].end; + + overlap = hibernate_check_overlap(r1s, r1e, r2s, r2e); + if (overlap) + chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_CONFLICT; + + img_index += chunks[ochunks[i]].compressed_size; + } + + /* + * Prepare the final output chunk list. Calculate an output + * inflate strategy for overlapping chunks if needed. + */ + img_index=pig_start; + for (i=0; i < nochunks ; i++) { + /* + * If a conflict is detected, consume enough compressed + * output chunks to fill the piglet + */ + if (chunks[ochunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) { + copy_start = piglet_base; + copy_end = piglet_end; + piglet_cur = piglet_base; + npchunks = 0; + j=i; + while (copy_start < copy_end && j < nochunks) { + piglet_cur += chunks[ochunks[j]].compressed_size; + pchunks[npchunks] = ochunks[j]; + npchunks++; + copy_start += chunks[ochunks[j]].compressed_size; + img_index += chunks[ochunks[j]].compressed_size; + i++; + j++; + } + + piglet_cur = piglet_base; + for (j=0; j < npchunks; j++) { + piglet_cur += chunks[pchunks[j]].compressed_size; + fchunks[nfchunks] = pchunks[j]; + chunks[pchunks[j]].flags |= HIBERNATE_CHUNK_USED; + nfchunks++; + } + } else { + /* + * No conflict, chunk can be added without copying + */ + if ((chunks[ochunks[i]].flags & + HIBERNATE_CHUNK_USED) == 0) { + fchunks[nfchunks] = ochunks[i]; + chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_USED; + nfchunks++; + } + + img_index += chunks[ochunks[i]].compressed_size; + } + } + + img_index = pig_start; + for(i=0 ; i< nfchunks; i++) { + piglet_cur = piglet_base; + img_index += chunks[fchunks[i]].compressed_size; + } + + img_cur = pig_start; + + for(i=0; i<nfchunks; i++) { + blkctr = chunks[fchunks[i]].offset - hib_info->swap_offset; + processed = 0; + compressed_size = chunks[fchunks[i]].compressed_size; + + while (processed < compressed_size) { + pmap_kenter_pa(hibernate_temp_page, img_cur, + VM_PROT_ALL); + pmap_kenter_pa(hibernate_temp_page + PAGE_SIZE, + img_cur+PAGE_SIZE, VM_PROT_ALL); + + /* XXX - needed on i386. check other archs */ + pmap_activate(curproc); + if (compressed_size - processed >= PAGE_SIZE) + read_size = PAGE_SIZE; + else + read_size = compressed_size - processed; + + hibernate_read_block(hib_info, blkctr, read_size, + hibernate_temp_page + (img_cur & PAGE_MASK)); + + blkctr += (read_size / hib_info->secsize); + + hibernate_flush(); + pmap_kremove(hibernate_temp_page, PAGE_SIZE); + pmap_kremove(hibernate_temp_page + PAGE_SIZE, + PAGE_SIZE); + processed += read_size; + img_cur += read_size; + } + } + + return (0); +} + +/* + * Hibernating a machine comprises the following operations: + * 1. Calculating this machine's hibernate_info information + * 2. Allocating a piglet and saving the piglet's physaddr + * 3. Calculating the memory chunks + * 4. Writing the compressed chunks to disk + * 5. Writing the chunk table + * 6. Writing the signature block (hibernate_info) + * + * On most architectures, the function calling hibernate_suspend would + * then power off the machine using some MD-specific implementation. + */ +int +hibernate_suspend() +{ + union hibernate_info hib_info; + + /* + * Calculate memory ranges, swap offsets, etc. + * This also allocates a piglet whose physaddr is stored in + * hib_info->piglet_pa and vaddr stored in hib_info->piglet_va + */ + if (get_hibernate_info(&hib_info, 1)) + return (1); + + /* XXX - Won't need to zero everything with RLE */ + uvm_pmr_zero_everything(); + + if (hibernate_write_chunks(&hib_info)) + return (1); + + if (hibernate_write_chunktable(&hib_info)) + return (1); + + return hibernate_write_signature(&hib_info); +} diff --git a/sys/sys/hibernate.h b/sys/sys/hibernate.h index 95c0fda28fe..9a17446416f 100644 --- a/sys/sys/hibernate.h +++ b/sys/sys/hibernate.h @@ -1,4 +1,4 @@ -/* $OpenBSD: hibernate.h,v 1.12 2011/07/11 03:30:32 mlarkin Exp $ */ +/* $OpenBSD: hibernate.h,v 1.13 2011/09/21 02:51:23 mlarkin Exp $ */ /* * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> @@ -42,11 +42,11 @@ struct hiballoc_arena }; /* - * struct hibernate_state + * struct hibernate_zlib_state * * Describes a zlib compression stream and its associated hiballoc area */ -struct hibernate_state { +struct hibernate_zlib_state { z_stream hib_stream; struct hiballoc_arena hiballoc_arena; }; @@ -70,7 +70,7 @@ struct hibernate_memory_range { struct hibernate_disk_chunk { paddr_t base; /* Base of chunk */ paddr_t end; /* End of chunk */ - size_t offset; /* Abs. disk block locating chunk */ + daddr_t offset; /* Abs. disk block locating chunk */ size_t compressed_size; /* Compressed size on disk */ short flags; /* Flags */ }; @@ -84,17 +84,18 @@ struct hibernate_disk_chunk { */ union hibernate_info { struct { - size_t nranges; - size_t image_size; - size_t chunk_ctr; - u_int32_t secsize; - dev_t device; - daddr_t swap_offset; - daddr_t sig_offset; - daddr_t image_offset; - paddr_t piglet_base; - struct hibernate_memory_range ranges[VM_PHYSSEG_MAX]; - char kernel_version[128]; + size_t nranges; + struct hibernate_memory_range ranges[VM_PHYSSEG_MAX]; + size_t image_size; + size_t chunk_ctr; + u_int32_t secsize; + dev_t device; + daddr_t swap_offset; + daddr_t sig_offset; + daddr_t image_offset; + paddr_t piglet_pa; + vaddr_t piglet_va; + char kernel_version[128]; int (*io_func)(dev_t, daddr_t, vaddr_t, size_t, int, void *); }; @@ -108,22 +109,32 @@ int hiballoc_init(struct hiballoc_arena*, void*, size_t len); void uvm_pmr_zero_everything(void); void uvm_pmr_dirty_everything(void); int uvm_pmr_alloc_pig(paddr_t*, psize_t); -int uvm_pmr_alloc_piglet(paddr_t*, psize_t, paddr_t); +int uvm_pmr_alloc_piglet(vaddr_t*, paddr_t*, vsize_t, paddr_t); +void uvm_pmr_free_piglet(vaddr_t, vsize_t); psize_t uvm_page_rle(paddr_t); void *get_hibernate_io_function(void); -int get_hibernate_info(union hibernate_info *); +int get_hibernate_info(union hibernate_info *, int); +int hibernate_zlib_reset(union hibernate_info *, int); void *hibernate_zlib_alloc(void *, int, int); void hibernate_zlib_free(void *, void *); -void hibernate_inflate(paddr_t, paddr_t, size_t); -size_t hibernate_deflate(paddr_t, size_t *); +void hibernate_inflate(union hibernate_info *, paddr_t, paddr_t, size_t); +size_t hibernate_deflate(union hibernate_info *, paddr_t, size_t *); int hibernate_read_block(union hibernate_info *, daddr_t, size_t, vaddr_t); int hibernate_write_signature(union hibernate_info *); +int hibernate_write_chunktable(union hibernate_info *); +int hibernate_write_chunks(union hibernate_info *); int hibernate_clear_signature(void); int hibernate_compare_signature(union hibernate_info *, union hibernate_info *); +void hibernate_resume(void); +int hibernate_suspend(void); +int hibernate_read_image(union hibernate_info *); +int hibernate_read_chunks(union hibernate_info *, paddr_t, paddr_t, size_t); +void hibernate_unpack_image(union hibernate_info *); +void hibernate_populate_resume_pt(union hibernate_info *, paddr_t, paddr_t); int hibernate_check_overlap(paddr_t, paddr_t, paddr_t, paddr_t); |