/* $OpenBSD: subr_hibernate.c,v 1.142 2024/08/18 08:01:03 mpi Exp $ */ /* * Copyright (c) 2011 Ariane van der Steldt * Copyright (c) 2011 Mike Larkin * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Make sure the signature can fit in one block */ CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE); /* * Hibernate piglet layout information * * The piglet is a scratch area of memory allocated by the suspending kernel. * Its phys and virt addrs are recorded in the signature block. The piglet is * used to guarantee an unused area of memory that can be used by the resuming * kernel for various things. The piglet is excluded during unpack operations. * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). * * Offset from piglet_base Purpose * ---------------------------------------------------------------------------- * 0 Private page for suspend I/O write functions * 1*PAGE_SIZE I/O page used during hibernate suspend * 2*PAGE_SIZE I/O page used during hibernate suspend * 3*PAGE_SIZE copy page used during hibernate suspend * 4*PAGE_SIZE final chunk ordering list (24 pages) * 28*PAGE_SIZE RLE utility page * 29*PAGE_SIZE start of hiballoc area * 30*PAGE_SIZE preserved entropy * 110*PAGE_SIZE end of hiballoc area (80 pages) * 366*PAGE_SIZE end of retguard preservation region (256 pages) * ... unused * HIBERNATE_CHUNK_SIZE start of hibernate chunk table * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked * 4*HIBERNATE_CHUNK_SIZE end of piglet */ /* Temporary vaddr ranges used during hibernate */ vaddr_t hibernate_temp_page; vaddr_t hibernate_copy_page; vaddr_t hibernate_rle_page; /* Hibernate info as read from disk during resume */ union hibernate_info disk_hib; struct bdevsw *bdsw; /* * Global copy of the pig start address. This needs to be a global as we * switch stacks after computing it - it can't be stored on the stack. */ paddr_t global_pig_start; /* * Global copies of the piglet start addresses (PA/VA). We store these * as globals to avoid having to carry them around as parameters, as the * piglet is allocated early and freed late - its lifecycle extends beyond * that of the hibernate info union which is calculated on suspend/resume. */ vaddr_t global_piglet_va; paddr_t global_piglet_pa; /* #define HIB_DEBUG */ #ifdef HIB_DEBUG int hib_debug = 99; #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) #else #define DPRINTF(x...) #define DNPRINTF(n,x...) #endif #define ROUNDUP(_x, _y) ((((_x)+(_y)-1)/(_y))*(_y)) #ifndef NO_PROPOLICE extern long __guard_local; #endif /* ! NO_PROPOLICE */ /* Retguard phys address (need to skip this region during unpack) */ paddr_t retguard_start_phys, retguard_end_phys; extern char __retguard_start, __retguard_end; void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); int hibernate_calc_rle(paddr_t, paddr_t); int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, size_t *); #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE) /* * Hib alloc enforced alignment. */ #define HIB_ALIGN 8 /* bytes alignment */ /* * sizeof builtin operation, but with alignment constraint. */ #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) struct hiballoc_entry { size_t hibe_use; size_t hibe_space; RBT_ENTRY(hiballoc_entry) hibe_entry; }; /* * Sort hibernate memory ranges by ascending PA */ void hibernate_sort_ranges(union hibernate_info *hib_info) { int i, j; struct hibernate_memory_range *ranges; paddr_t base, end; ranges = hib_info->ranges; for (i = 1; i < hib_info->nranges; i++) { j = i; while (j > 0 && ranges[j - 1].base > ranges[j].base) { base = ranges[j].base; end = ranges[j].end; ranges[j].base = ranges[j - 1].base; ranges[j].end = ranges[j - 1].end; ranges[j - 1].base = base; ranges[j - 1].end = end; j--; } } } /* * Compare hiballoc entries based on the address they manage. * * Since the address is fixed, relative to struct hiballoc_entry, * we just compare the hiballoc_entry pointers. */ static __inline int hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) { vaddr_t vl = (vaddr_t)l; vaddr_t vr = (vaddr_t)r; return vl < vr ? -1 : (vl > vr); } RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) /* * Given a hiballoc entry, return the address it manages. */ static __inline void * hib_entry_to_addr(struct hiballoc_entry *entry) { caddr_t addr; addr = (caddr_t)entry; addr += HIB_SIZEOF(struct hiballoc_entry); return addr; } /* * Given an address, find the hiballoc that corresponds. */ static __inline struct hiballoc_entry* hib_addr_to_entry(void *addr_param) { caddr_t addr; addr = (caddr_t)addr_param; addr -= HIB_SIZEOF(struct hiballoc_entry); return (struct hiballoc_entry*)addr; } RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp); /* * Allocate memory from the arena. * * Returns NULL if no memory is available. */ void * hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) { struct hiballoc_entry *entry, *new_entry; size_t find_sz; /* * Enforce alignment of HIB_ALIGN bytes. * * Note that, because the entry is put in front of the allocation, * 0-byte allocations are guaranteed a unique address. */ alloc_sz = roundup(alloc_sz, HIB_ALIGN); /* * Find an entry with hibe_space >= find_sz. * * If the root node is not large enough, we switch to tree traversal. * Because all entries are made at the bottom of the free space, * traversal from the end has a slightly better chance of yielding * a sufficiently large space. */ find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs); if (entry != NULL && entry->hibe_space < find_sz) { RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { if (entry->hibe_space >= find_sz) break; } } /* * Insufficient or too fragmented memory. */ if (entry == NULL) return NULL; /* * Create new entry in allocated space. */ new_entry = (struct hiballoc_entry*)( (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); new_entry->hibe_space = entry->hibe_space - find_sz; new_entry->hibe_use = alloc_sz; /* * Insert entry. */ if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) panic("hib_alloc: insert failure"); entry->hibe_space = 0; /* Return address managed by entry. */ return hib_entry_to_addr(new_entry); } void hib_getentropy(char **bufp, size_t *bufplen) { if (!bufp || !bufplen) return; *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE)); *bufplen = PAGE_SIZE; } /* * Free a pointer previously allocated from this arena. * * If addr is NULL, this will be silently accepted. */ void hib_free(struct hiballoc_arena *arena, void *addr) { struct hiballoc_entry *entry, *prev; if (addr == NULL) return; /* * Derive entry from addr and check it is really in this arena. */ entry = hib_addr_to_entry(addr); if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) panic("hib_free: freed item %p not in hib arena", addr); /* * Give the space in entry to its predecessor. * * If entry has no predecessor, change its used space into free space * instead. */ prev = RBT_PREV(hiballoc_addr, entry); if (prev != NULL && (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + prev->hibe_use + prev->hibe_space) == entry) { /* Merge entry. */ RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + entry->hibe_use + entry->hibe_space; } else { /* Flip used memory to free space. */ entry->hibe_space += entry->hibe_use; entry->hibe_use = 0; } } /* * Initialize hiballoc. * * The allocator will manage memory at ptr, which is len bytes. */ int hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) { struct hiballoc_entry *entry; caddr_t ptr; size_t len; RBT_INIT(hiballoc_addr, &arena->hib_addrs); /* * Hib allocator enforces HIB_ALIGN alignment. * Fixup ptr and len. */ ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); len = p_len - ((size_t)ptr - (size_t)p_ptr); len &= ~((size_t)HIB_ALIGN - 1); /* * Insufficient memory to be able to allocate and also do bookkeeping. */ if (len <= HIB_SIZEOF(struct hiballoc_entry)) return ENOMEM; /* * Create entry describing space. */ entry = (struct hiballoc_entry*)ptr; entry->hibe_use = 0; entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry); return 0; } /* * Zero all free memory. */ void uvm_pmr_zero_everything(void) { struct uvm_pmemrange *pmr; struct vm_page *pg; int i; uvm_lock_fpageq(); TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { /* Zero single pages. */ while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])) != NULL) { uvm_pmr_remove(pmr, pg); uvm_pagezero(pg); atomic_setbits_int(&pg->pg_flags, PG_ZERO); uvmexp.zeropages++; uvm_pmr_insert(pmr, pg, 0); } /* Zero multi page ranges. */ while ((pg = RBT_ROOT(uvm_pmr_size, &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) { pg--; /* Size tree always has second page. */ uvm_pmr_remove(pmr, pg); for (i = 0; i < pg->fpgsz; i++) { uvm_pagezero(&pg[i]); atomic_setbits_int(&pg[i].pg_flags, PG_ZERO); uvmexp.zeropages++; } uvm_pmr_insert(pmr, pg, 0); } } uvm_unlock_fpageq(); } /* * Mark all memory as dirty. * * Used to inform the system that the clean memory isn't clean for some * reason, for example because we just came back from hibernate. */ void uvm_pmr_dirty_everything(void) { struct uvm_pmemrange *pmr; struct vm_page *pg; int i; uvm_lock_fpageq(); TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { /* Dirty single pages. */ while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) != NULL) { uvm_pmr_remove(pmr, pg); atomic_clearbits_int(&pg->pg_flags, PG_ZERO); uvm_pmr_insert(pmr, pg, 0); } /* Dirty multi page ranges. */ while ((pg = RBT_ROOT(uvm_pmr_size, &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) { pg--; /* Size tree always has second page. */ uvm_pmr_remove(pmr, pg); for (i = 0; i < pg->fpgsz; i++) atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); uvm_pmr_insert(pmr, pg, 0); } } uvmexp.zeropages = 0; uvm_unlock_fpageq(); } /* * Allocate an area that can hold sz bytes and doesn't overlap with * the piglet at piglet_pa. */ int uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) { struct uvm_constraint_range pig_constraint; struct kmem_pa_mode kp_pig = { .kp_constraint = &pig_constraint, .kp_maxseg = 1 }; vaddr_t va; sz = round_page(sz); pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE; pig_constraint.ucr_high = -1; va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); if (va == 0) { pig_constraint.ucr_low = 0; pig_constraint.ucr_high = piglet_pa - 1; va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); if (va == 0) return ENOMEM; } pmap_extract(pmap_kernel(), va, pa); return 0; } /* * Allocate a piglet area. * * This needs to be in DMA-safe memory. * Piglets are aligned. * * sz and align in bytes. */ int uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) { struct kmem_pa_mode kp_piglet = { .kp_constraint = &dma_constraint, .kp_align = align, .kp_maxseg = 1 }; /* Ensure align is a power of 2 */ KASSERT((align & (align - 1)) == 0); /* * Fixup arguments: align must be at least PAGE_SIZE, * sz will be converted to pagecount, since that is what * pmemrange uses internally. */ if (align < PAGE_SIZE) kp_piglet.kp_align = PAGE_SIZE; sz = round_page(sz); *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); if (*va == 0) return ENOMEM; pmap_extract(pmap_kernel(), *va, pa); return 0; } /* * Free a piglet area. */ void uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) { /* * Fix parameters. */ sz = round_page(sz); /* * Free the physical and virtual memory. */ km_free((void *)va, sz, &kv_any, &kp_dma_contig); } /* * Physmem RLE compression support. * * Given a physical page address, return the number of pages starting at the * address that are free. Clamps to the number of pages in * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. */ int uvm_page_rle(paddr_t addr) { struct vm_page *pg, *pg_end; struct vm_physseg *vmp; int pseg_idx, off_idx; pseg_idx = vm_physseg_find(atop(addr), &off_idx); if (pseg_idx == -1) return 0; vmp = &vm_physmem[pseg_idx]; pg = &vmp->pgs[off_idx]; if (!(pg->pg_flags & PQ_FREE)) return 0; /* * Search for the first non-free page after pg. * Note that the page may not be the first page in a free pmemrange, * therefore pg->fpgsz cannot be used. */ for (pg_end = pg; pg_end <= vmp->lastpg && (pg_end->pg_flags & PQ_FREE) == PQ_FREE && (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++) ; return pg_end - pg; } /* * Fills out the hibernate_info union pointed to by hib * with information about this machine (swap signature block * offsets, number of memory ranges, kernel in use, etc) */ int get_hibernate_info(union hibernate_info *hib, int suspend) { struct disklabel dl; char err_string[128], *dl_ret; int part; SHA2_CTX ctx; void *fn; #ifndef NO_PROPOLICE /* Save propolice guard */ hib->guard = __guard_local; #endif /* ! NO_PROPOLICE */ /* Determine I/O function to use */ hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev); if (hib->io_func == NULL) return (1); /* Calculate hibernate device */ hib->dev = swdevt[0].sw_dev; /* Read disklabel (used to calculate signature and image offsets) */ dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); if (dl_ret) { printf("Hibernate error reading disklabel: %s\n", dl_ret); return (1); } /* Make sure we have a swap partition. */ part = DISKPART(hib->dev); if (dl.d_npartitions <= part || dl.d_secsize > sizeof(union hibernate_info) || dl.d_partitions[part].p_fstype != FS_SWAP || DL_GETPSIZE(&dl.d_partitions[part]) == 0) return (1); /* Magic number */ hib->magic = HIBERNATE_MAGIC; /* Calculate signature block location */ hib->sec_size = dl.d_secsize; hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1; hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset); SHA256Init(&ctx); SHA256Update(&ctx, version, strlen(version)); fn = printf; SHA256Update(&ctx, &fn, sizeof(fn)); fn = malloc; SHA256Update(&ctx, &fn, sizeof(fn)); fn = km_alloc; SHA256Update(&ctx, &fn, sizeof(fn)); fn = strlen; SHA256Update(&ctx, &fn, sizeof(fn)); SHA256Final((u_int8_t *)&hib->kern_hash, &ctx); if (suspend) { /* Grab the previously-allocated piglet addresses */ hib->piglet_va = global_piglet_va; hib->piglet_pa = global_piglet_pa; hib->io_page = (void *)hib->piglet_va; /* * Initialization of the hibernate IO function for drivers * that need to do prep work (such as allocating memory or * setting up data structures that cannot safely be done * during suspend without causing side effects). There is * a matching HIB_DONE call performed after the write is * completed. */ if (hib->io_func(hib->dev, DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])), (vaddr_t)NULL, DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])), HIB_INIT, hib->io_page)) goto fail; } else { /* * Resuming kernels use a regular private page for the driver * No need to free this I/O page as it will vanish as part of * the resume. */ hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!hib->io_page) goto fail; } if (get_hibernate_info_md(hib)) goto fail; return (0); fail: return (1); } /* * Allocate nitems*size bytes from the hiballoc area presently in use */ void * hibernate_zlib_alloc(void *unused, int nitems, int size) { struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); } /* * Free the memory pointed to by addr in the hiballoc area presently in * use */ void hibernate_zlib_free(void *unused, void *addr) { struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; hib_free(&hibernate_state->hiballoc_arena, addr); } /* * Inflate next page of data from the image stream. * The rle parameter is modified on exit to contain the number of pages to * skip in the output stream (or 0 if this page was inflated into). * * Returns 0 if the stream contains additional data, or 1 if the stream is * finished. */ int hibernate_inflate_page(int *rle) { struct hibernate_zlib_state *hibernate_state; int i; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; /* Set up the stream for RLE code inflate */ hibernate_state->hib_stream.next_out = (unsigned char *)rle; hibernate_state->hib_stream.avail_out = sizeof(*rle); /* Inflate RLE code */ i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH); if (i != Z_OK && i != Z_STREAM_END) { /* * XXX - this will likely reboot/hang most machines * since the console output buffer will be unmapped, * but there's not much else we can do here. */ panic("rle inflate stream error"); } if (hibernate_state->hib_stream.avail_out != 0) { /* * XXX - this will likely reboot/hang most machines * since the console output buffer will be unmapped, * but there's not much else we can do here. */ panic("rle short inflate error"); } if (*rle < 0 || *rle > 1024) { /* * XXX - this will likely reboot/hang most machines * since the console output buffer will be unmapped, * but there's not much else we can do here. */ panic("invalid rle count"); } if (i == Z_STREAM_END) return (1); if (*rle != 0) return (0); /* Set up the stream for page inflate */ hibernate_state->hib_stream.next_out = (unsigned char *)HIBERNATE_INFLATE_PAGE; hibernate_state->hib_stream.avail_out = PAGE_SIZE; /* Process next block of data */ i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH); if (i != Z_OK && i != Z_STREAM_END) { /* * XXX - this will likely reboot/hang most machines * since the console output buffer will be unmapped, * but there's not much else we can do here. */ panic("inflate error"); } /* We should always have extracted a full page ... */ if (hibernate_state->hib_stream.avail_out != 0) { /* * XXX - this will likely reboot/hang most machines * since the console output buffer will be unmapped, * but there's not much else we can do here. */ panic("incomplete page"); } return (i == Z_STREAM_END); } /* * Inflate size bytes from src into dest, skipping any pages in * [src..dest] that are special (see hibernate_inflate_skip) * * This function executes while using the resume-time stack * and pmap, and therefore cannot use ddb/printf/etc. Doing so * will likely hang or reset the machine since the console output buffer * will be unmapped. */ void hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, paddr_t src, size_t size) { int end_stream = 0, rle, skip; struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; hibernate_state->hib_stream.next_in = (unsigned char *)src; hibernate_state->hib_stream.avail_in = size; do { /* * Is this a special page? If yes, redirect the * inflate output to a scratch page (eg, discard it) */ skip = hibernate_inflate_skip(hib, dest); if (skip == HIB_SKIP) { hibernate_enter_resume_mapping( HIBERNATE_INFLATE_PAGE, HIBERNATE_INFLATE_PAGE, 0); } else if (skip == HIB_MOVE) { /* * Special case : retguard region. This gets moved * temporarily into the piglet region and copied into * place immediately before resume */ hibernate_enter_resume_mapping( HIBERNATE_INFLATE_PAGE, hib->piglet_pa + (110 * PAGE_SIZE) + hib->retguard_ofs, 0); hib->retguard_ofs += PAGE_SIZE; if (hib->retguard_ofs > 255 * PAGE_SIZE) { /* * XXX - this will likely reboot/hang most * machines since the console output * buffer will be unmapped, but there's * not much else we can do here. */ panic("retguard move error, out of space"); } } else { hibernate_enter_resume_mapping( HIBERNATE_INFLATE_PAGE, dest, 0); } hibernate_flush(); end_stream = hibernate_inflate_page(&rle); if (rle == 0) dest += PAGE_SIZE; else dest += (rle * PAGE_SIZE); } while (!end_stream); } /* * deflate from src into the I/O page, up to 'remaining' bytes * * Returns number of input bytes consumed, and may reset * the 'remaining' parameter if not all the output space was consumed * (this information is needed to know how much to write to disk) */ size_t hibernate_deflate(union hibernate_info *hib, paddr_t src, size_t *remaining) { vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; /* Set up the stream for deflate */ hibernate_state->hib_stream.next_in = (unsigned char *)src; hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK); hibernate_state->hib_stream.next_out = (unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining); hibernate_state->hib_stream.avail_out = *remaining; /* Process next block of data */ if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK) panic("hibernate zlib deflate error"); /* Update pointers and return number of bytes consumed */ *remaining = hibernate_state->hib_stream.avail_out; return (PAGE_SIZE - (src & PAGE_MASK)) - hibernate_state->hib_stream.avail_in; } /* * Write the hibernation information specified in hiber_info * to the location in swap previously calculated (last block of * swap), called the "signature block". */ int hibernate_write_signature(union hibernate_info *hib) { memset(&disk_hib, 0, hib->sec_size); memcpy(&disk_hib, hib, DEV_BSIZE); /* Write hibernate info to disk */ return (hib->io_func(hib->dev, hib->sig_offset, (vaddr_t)&disk_hib, hib->sec_size, HIB_W, hib->io_page)); } /* * Write the memory chunk table to the area in swap immediately * preceding the signature block. The chunk table is stored * in the piglet when this function is called. Returns errno. */ int hibernate_write_chunktable(union hibernate_info *hib) { vaddr_t hibernate_chunk_table_start; size_t hibernate_chunk_table_size; int i, err; hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE; hibernate_chunk_table_start = hib->piglet_va + HIBERNATE_CHUNK_SIZE; /* Write chunk table */ for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) { if ((err = hib->io_func(hib->dev, hib->chunktable_offset + (i/DEV_BSIZE), (vaddr_t)(hibernate_chunk_table_start + i), MAXPHYS, HIB_W, hib->io_page))) { DPRINTF("chunktable write error: %d\n", err); return (err); } } return (0); } /* * Write an empty hiber_info to the swap signature block, which is * guaranteed to not match any valid hib. */ int hibernate_clear_signature(union hibernate_info *hib) { uint8_t buf[DEV_BSIZE]; /* Zero out a blank hiber_info */ memcpy(&buf, &disk_hib, sizeof(buf)); memset(&disk_hib, 0, hib->sec_size); /* Write (zeroed) hibernate info to disk */ DPRINTF("clearing hibernate signature block location: %lld\n", hib->sig_offset); if (hibernate_block_io(hib, hib->sig_offset, hib->sec_size, (vaddr_t)&disk_hib, 1)) printf("Warning: could not clear hibernate signature\n"); memcpy(&disk_hib, buf, sizeof(buf)); return (0); } /* * Compare two hibernate_infos to determine if they are the same (eg, * we should be performing a hibernate resume on this machine. * Not all fields are checked - just enough to verify that the machine * has the same memory configuration and kernel as the one that * wrote the signature previously. */ int hibernate_compare_signature(union hibernate_info *mine, union hibernate_info *disk) { u_int i; if (mine->nranges != disk->nranges) { printf("unhibernate failed: memory layout changed\n"); return (1); } if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) { printf("unhibernate failed: original kernel changed\n"); return (1); } for (i = 0; i < mine->nranges; i++) { if ((mine->ranges[i].base != disk->ranges[i].base) || (mine->ranges[i].end != disk->ranges[i].end) ) { DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", i, (void *)mine->ranges[i].base, (void *)mine->ranges[i].end, (void *)disk->ranges[i].base, (void *)disk->ranges[i].end); printf("unhibernate failed: memory size changed\n"); return (1); } } return (0); } /* * Transfers xfer_size bytes between the hibernate device specified in * hib_info at offset blkctr and the vaddr specified at dest. * * Separate offsets and pages are used to handle misaligned reads (reads * that span a page boundary). * * blkctr specifies a relative offset (relative to the start of swap), * not an absolute disk offset * */ int hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, size_t xfer_size, vaddr_t dest, int iswrite) { struct buf *bp; int error; bp = geteblk(xfer_size); if (iswrite) bcopy((caddr_t)dest, bp->b_data, xfer_size); bp->b_bcount = xfer_size; bp->b_blkno = blkctr; CLR(bp->b_flags, B_READ | B_WRITE | B_DONE); SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW); bp->b_dev = hib->dev; (*bdsw->d_strategy)(bp); error = biowait(bp); if (error) { printf("hib block_io biowait error %d blk %lld size %zu\n", error, (long long)blkctr, xfer_size); } else if (!iswrite) bcopy(bp->b_data, (caddr_t)dest, xfer_size); bp->b_flags |= B_INVAL; brelse(bp); return (error != 0); } /* * Preserve one page worth of random data, generated from the resuming * kernel's arc4random. After resume, this preserved entropy can be used * to further improve the un-hibernated machine's entropy pool. This * random data is stored in the piglet, which is preserved across the * unpack operation, and is restored later in the resume process (see * hib_getentropy) */ void hibernate_preserve_entropy(union hibernate_info *hib) { void *entropy; entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); if (!entropy) return; pmap_activate(curproc); pmap_kenter_pa((vaddr_t)entropy, (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)), PROT_READ | PROT_WRITE); arc4random_buf((void *)entropy, PAGE_SIZE); pmap_kremove((vaddr_t)entropy, PAGE_SIZE); km_free(entropy, PAGE_SIZE, &kv_any, &kp_none); } #ifndef NO_PROPOLICE vaddr_t hibernate_unprotect_ssp(void) { struct kmem_dyn_mode kd_avoidalias; vaddr_t va = trunc_page((vaddr_t)&__guard_local); paddr_t pa; pmap_extract(pmap_kernel(), va, &pa); memset(&kd_avoidalias, 0, sizeof kd_avoidalias); kd_avoidalias.kd_prefer = pa; kd_avoidalias.kd_waitok = 1; va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias); if (!va) panic("hibernate_unprotect_ssp"); pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE); pmap_update(pmap_kernel()); return va; } void hibernate_reprotect_ssp(vaddr_t va) { pmap_kremove(va, PAGE_SIZE); km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none); } #endif /* NO_PROPOLICE */ /* * Reads the signature block from swap, checks against the current machine's * information. If the information matches, perform a resume by reading the * saved image into the pig area, and unpacking. * * Must be called with interrupts enabled. */ void hibernate_resume(void) { uint8_t buf[DEV_BSIZE]; union hibernate_info *hib = (union hibernate_info *)&buf; int s; #ifndef NO_PROPOLICE vsize_t off = (vaddr_t)&__guard_local - trunc_page((vaddr_t)&__guard_local); vaddr_t guard_va; #endif /* Get current running machine's hibernate info */ memset(buf, 0, sizeof(buf)); if (get_hibernate_info(hib, 0)) { DPRINTF("couldn't retrieve machine's hibernate info\n"); return; } /* Read hibernate info from disk */ s = splbio(); bdsw = &bdevsw[major(hib->dev)]; if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) { printf("hibernate_resume device open failed\n"); splx(s); return; } DPRINTF("reading hibernate signature block location: %lld\n", hib->sig_offset); if (hibernate_block_io(hib, hib->sig_offset, hib->sec_size, (vaddr_t)&disk_hib, 0)) { DPRINTF("error in hibernate read\n"); goto fail; } /* Check magic number */ if (disk_hib.magic != HIBERNATE_MAGIC) { DPRINTF("wrong magic number in hibernate signature: %x\n", disk_hib.magic); goto fail; } /* * We (possibly) found a hibernate signature. Clear signature first, * to prevent accidental resume or endless resume cycles later. */ if (hibernate_clear_signature(hib)) { DPRINTF("error clearing hibernate signature block\n"); goto fail; } /* * If on-disk and in-memory hibernate signatures match, * this means we should do a resume from hibernate. */ if (hibernate_compare_signature(hib, &disk_hib)) { DPRINTF("mismatched hibernate signature block\n"); goto fail; } disk_hib.dev = hib->dev; #ifdef MULTIPROCESSOR /* XXX - if we fail later, we may need to rehatch APs on some archs */ DPRINTF("hibernate: quiescing APs\n"); hibernate_quiesce_cpus(); #endif /* MULTIPROCESSOR */ /* Read the image from disk into the image (pig) area */ if (hibernate_read_image(&disk_hib)) goto fail; if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc)) printf("hibernate_resume device close failed\n"); bdsw = NULL; DPRINTF("hibernate: quiescing devices\n"); if (config_suspend_all(DVACT_QUIESCE) != 0) goto fail; #ifndef NO_PROPOLICE guard_va = hibernate_unprotect_ssp(); #endif /* NO_PROPOLICE */ (void) splhigh(); hibernate_disable_intr_machdep(); cold = 2; DPRINTF("hibernate: suspending devices\n"); if (config_suspend_all(DVACT_SUSPEND) != 0) { cold = 0; hibernate_enable_intr_machdep(); #ifndef NO_PROPOLICE hibernate_reprotect_ssp(guard_va); #endif /* ! NO_PROPOLICE */ goto fail; } pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start, &retguard_start_phys); pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end, &retguard_end_phys); hibernate_preserve_entropy(&disk_hib); printf("Unpacking image...\n"); /* Switch stacks */ DPRINTF("hibernate: switching stacks\n"); hibernate_switch_stack_machdep(); #ifndef NO_PROPOLICE /* Start using suspended kernel's propolice guard */ *(long *)(guard_va + off) = disk_hib.guard; hibernate_reprotect_ssp(guard_va); #endif /* ! NO_PROPOLICE */ /* Unpack and resume */ hibernate_unpack_image(&disk_hib); fail: if (!bdsw) printf("\nUnable to resume hibernated image\n"); else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc)) printf("hibernate_resume device close failed\n"); splx(s); } /* * Unpack image from pig area to original location by looping through the * list of output chunks in the order they should be restored (fchunks). * * Note that due to the stack smash protector and the fact that we have * switched stacks, it is not permitted to return from this function. */ void hibernate_unpack_image(union hibernate_info *hib) { uint8_t buf[DEV_BSIZE]; struct hibernate_disk_chunk *chunks; union hibernate_info *local_hib = (union hibernate_info *)&buf; paddr_t image_cur = global_pig_start; short i, *fchunks; char *pva; /* Piglet will be identity mapped (VA == PA) */ pva = (char *)hib->piglet_pa; fchunks = (short *)(pva + (4 * PAGE_SIZE)); chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE); /* Can't use hiber_info that's passed in after this point */ memcpy(buf, hib, sizeof(buf)); local_hib->retguard_ofs = 0; /* VA == PA */ local_hib->piglet_va = local_hib->piglet_pa; /* * Point of no return. Once we pass this point, only kernel code can * be accessed. No global variables or other kernel data structures * are guaranteed to be coherent after unpack starts. * * The image is now in high memory (pig area), we unpack from the pig * to the correct location in memory. We'll eventually end up copying * on top of ourself, but we are assured the kernel code here is the * same between the hibernated and resuming kernel, and we are running * on our own stack, so the overwrite is ok. */ DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); hibernate_activate_resume_pt_machdep(); for (i = 0; i < local_hib->chunk_ctr; i++) { /* Reset zlib for inflate */ if (hibernate_zlib_reset(local_hib, 0) != Z_OK) panic("hibernate failed to reset zlib for inflate"); hibernate_process_chunk(local_hib, &chunks[fchunks[i]], image_cur); image_cur += chunks[fchunks[i]].compressed_size; } /* * Resume the loaded kernel by jumping to the MD resume vector. * We won't be returning from this call. We pass the location of * the retguard save area so the MD code can replace it before * resuming. See the piglet layout at the top of this file for * more information on the layout of the piglet area. * * We use 'global_piglet_va' here since by the time we are at * this point, we have already unpacked the image, and we want * the suspended kernel's view of what the piglet was, before * suspend occurred (since we will need to use that in the retguard * copy code in hibernate_resume_machdep.) */ hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE)); } /* * Bounce a compressed image chunk to the piglet, entering mappings for the * copied pages as needed */ void hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) { size_t ct, ofs; paddr_t src = img_cur; vaddr_t dest = piglet; /* Copy first partial page */ ct = (PAGE_SIZE) - (src & PAGE_MASK); ofs = (src & PAGE_MASK); if (ct < PAGE_SIZE) { hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, (src - ofs), 0); hibernate_flush(); bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct); src += ct; dest += ct; } /* Copy remaining pages */ while (src < size + img_cur) { hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0); hibernate_flush(); ct = PAGE_SIZE; bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct); hibernate_flush(); src += ct; dest += ct; } } /* * Process a chunk by bouncing it to the piglet, followed by unpacking */ void hibernate_process_chunk(union hibernate_info *hib, struct hibernate_disk_chunk *chunk, paddr_t img_cur) { char *pva = (char *)hib->piglet_va; hibernate_copy_chunk_to_piglet(img_cur, (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size); hibernate_inflate_region(hib, chunk->base, (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size); } /* * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between * inaddr and range_end. */ int hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) { int rle; rle = uvm_page_rle(inaddr); KASSERT(rle >= 0 && rle <= MAX_RLE); /* Clamp RLE to range end */ if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end) rle = (range_end - inaddr) / PAGE_SIZE; return (rle); } /* * Write the RLE byte for page at 'inaddr' to the output stream. * Returns the number of pages to be skipped at 'inaddr'. */ int hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, paddr_t range_end, daddr_t *blkctr, size_t *out_remaining) { int rle, err, *rleloc; struct hibernate_zlib_state *hibernate_state; vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; rle = hibernate_calc_rle(inaddr, range_end); rleloc = (int *)hibernate_rle_page + MAX_RLE - 1; *rleloc = rle; /* Deflate the RLE byte into the stream */ hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); /* Did we fill the output page? If so, flush to disk */ if (*out_remaining == 0) { if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset, (vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W, hib->io_page))) { DPRINTF("hib write error %d\n", err); return (err); } *blkctr += PAGE_SIZE / DEV_BSIZE; *out_remaining = PAGE_SIZE; /* If we didn't deflate the entire RLE byte, finish it now */ if (hibernate_state->hib_stream.avail_in != 0) hibernate_deflate(hib, (vaddr_t)hibernate_state->hib_stream.next_in, out_remaining); } return (rle); } /* * Write a compressed version of this machine's memory to disk, at the * precalculated swap offset: * * end of swap - signature block size - chunk table size - memory size * * The function begins by looping through each phys mem range, cutting each * one into MD sized chunks. These chunks are then compressed individually * and written out to disk, in phys mem order. Some chunks might compress * more than others, and for this reason, each chunk's size is recorded * in the chunk table, which is written to disk after the image has * properly been compressed and written (in hibernate_write_chunktable). * * When this function is called, the machine is nearly suspended - most * devices are quiesced/suspended, interrupts are off, and cold has * been set. This means that there can be no side effects once the * write has started, and the write function itself can also have no * side effects. This also means no printfs are permitted (since printf * has side effects.) * * Return values : * * 0 - success * EIO - I/O error occurred writing the chunks * EINVAL - Failed to write a complete range * ENOMEM - Memory allocation failure during preparation of the zlib arena */ int hibernate_write_chunks(union hibernate_info *hib) { paddr_t range_base, range_end, inaddr, temp_inaddr; size_t out_remaining, used; struct hibernate_disk_chunk *chunks; vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; daddr_t blkctr = 0; int i, rle, err; struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; hib->chunk_ctr = 0; /* * Map the utility VAs to the piglet. See the piglet map at the * top of this file for piglet layout information. */ hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE; hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE; chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + HIBERNATE_CHUNK_SIZE); /* Calculate the chunk regions */ for (i = 0; i < hib->nranges; i++) { range_base = hib->ranges[i].base; range_end = hib->ranges[i].end; inaddr = range_base; while (inaddr < range_end) { chunks[hib->chunk_ctr].base = inaddr; if (inaddr + HIBERNATE_CHUNK_SIZE < range_end) chunks[hib->chunk_ctr].end = inaddr + HIBERNATE_CHUNK_SIZE; else chunks[hib->chunk_ctr].end = range_end; inaddr += HIBERNATE_CHUNK_SIZE; hib->chunk_ctr ++; } } uvm_pmr_dirty_everything(); uvm_pmr_zero_everything(); /* Compress and write the chunks in the chunktable */ for (i = 0; i < hib->chunk_ctr; i++) { range_base = chunks[i].base; range_end = chunks[i].end; chunks[i].offset = blkctr + hib->image_offset; /* Reset zlib for deflate */ if (hibernate_zlib_reset(hib, 1) != Z_OK) { DPRINTF("hibernate_zlib_reset failed for deflate\n"); return (ENOMEM); } inaddr = range_base; /* * For each range, loop through its phys mem region * and write out the chunks (the last chunk might be * smaller than the chunk size). */ while (inaddr < range_end) { out_remaining = PAGE_SIZE; while (out_remaining > 0 && inaddr < range_end) { /* * Adjust for regions that are not evenly * divisible by PAGE_SIZE or overflowed * pages from the previous iteration. */ temp_inaddr = (inaddr & PAGE_MASK) + hibernate_copy_page; /* Deflate from temp_inaddr to IO page */ if (inaddr != range_end) { if (inaddr % PAGE_SIZE == 0) { rle = hibernate_write_rle(hib, inaddr, range_end, &blkctr, &out_remaining); } if (rle == 0) { pmap_kenter_pa(hibernate_temp_page, inaddr & PMAP_PA_MASK, PROT_READ); bcopy((caddr_t)hibernate_temp_page, (caddr_t)hibernate_copy_page, PAGE_SIZE); inaddr += hibernate_deflate(hib, temp_inaddr, &out_remaining); } else { inaddr += rle * PAGE_SIZE; if (inaddr > range_end) inaddr = range_end; } } if (out_remaining == 0) { /* Filled up the page */ if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, (vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W, hib->io_page))) { DPRINTF("hib write error %d\n", err); return (err); } blkctr += PAGE_SIZE / DEV_BSIZE; } } } if (inaddr != range_end) { DPRINTF("deflate range ended prematurely\n"); return (EINVAL); } /* * End of range. Round up to next secsize bytes * after finishing compress */ if (out_remaining == 0) out_remaining = PAGE_SIZE; /* Finish compress */ hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; hibernate_state->hib_stream.avail_in = 0; hibernate_state->hib_stream.next_out = (unsigned char *)hibernate_io_page + (PAGE_SIZE - out_remaining); /* We have an extra output page available for finalize */ hibernate_state->hib_stream.avail_out = out_remaining + PAGE_SIZE; if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) != Z_STREAM_END) { DPRINTF("deflate error in output stream: %d\n", err); return (err); } out_remaining = hibernate_state->hib_stream.avail_out; /* Round up to next sector if needed */ used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size); /* Write final block(s) for this chunk */ if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, (vaddr_t)hibernate_io_page, used, HIB_W, hib->io_page))) { DPRINTF("hib final write error %d\n", err); return (err); } blkctr += used / DEV_BSIZE; chunks[i].compressed_size = (blkctr + hib->image_offset - chunks[i].offset) * DEV_BSIZE; } hib->chunktable_offset = hib->image_offset + blkctr; return (0); } /* * Reset the zlib stream state and allocate a new hiballoc area for either * inflate or deflate. This function is called once for each hibernate chunk. * Calling hiballoc_init multiple times is acceptable since the memory it is * provided is unmanaged memory (stolen). We use the memory provided to us * by the piglet allocated via the supplied hib. */ int hibernate_zlib_reset(union hibernate_info *hib, int deflate) { vaddr_t hibernate_zlib_start; size_t hibernate_zlib_size; char *pva = (char *)hib->piglet_va; struct hibernate_zlib_state *hibernate_state; hibernate_state = (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; if (!deflate) pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK)); /* * See piglet layout information at the start of this file for * information on the zlib page assignments. */ hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE)); hibernate_zlib_size = 80 * PAGE_SIZE; memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size); memset(hibernate_state, 0, PAGE_SIZE); /* Set up stream structure */ hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; /* Initialize the hiballoc arena for zlib allocs/frees */ hiballoc_init(&hibernate_state->hiballoc_arena, (caddr_t)hibernate_zlib_start, hibernate_zlib_size); if (deflate) { return deflateInit(&hibernate_state->hib_stream, Z_BEST_SPEED); } else return inflateInit(&hibernate_state->hib_stream); } /* * Reads the hibernated memory image from disk, whose location and * size are recorded in hib. Begin by reading the persisted * chunk table, which records the original chunk placement location * and compressed size for each. Next, allocate a pig region of * sufficient size to hold the compressed image. Next, read the * chunks into the pig area (calling hibernate_read_chunks to do this), * and finally, if all of the above succeeds, clear the hibernate signature. * The function will then return to hibernate_resume, which will proceed * to unpack the pig image to the correct place in memory. */ int hibernate_read_image(union hibernate_info *hib) { size_t compressed_size, disk_size, chunktable_size, pig_sz; paddr_t image_start, image_end, pig_start, pig_end; struct hibernate_disk_chunk *chunks; daddr_t blkctr; vaddr_t chunktable = (vaddr_t)NULL; paddr_t piglet_chunktable = hib->piglet_pa + HIBERNATE_CHUNK_SIZE; int i, status; status = 0; pmap_activate(curproc); /* Calculate total chunk table size in disk blocks */ chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE; blkctr = hib->chunktable_offset; chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any, &kp_none, &kd_nowait); if (!chunktable) return (1); /* Map chunktable pages */ for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE) pmap_kenter_pa(chunktable + i, piglet_chunktable + i, PROT_READ | PROT_WRITE); pmap_update(pmap_kernel()); /* Read the chunktable from disk into the piglet chunktable */ for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE) hibernate_block_io(hib, blkctr, MAXPHYS, chunktable + i, 0); blkctr = hib->image_offset; compressed_size = 0; chunks = (struct hibernate_disk_chunk *)chunktable; for (i = 0; i < hib->chunk_ctr; i++) compressed_size += chunks[i].compressed_size; disk_size = compressed_size; printf("unhibernating @ block %lld length %luMB\n", hib->sig_offset - chunktable_size, compressed_size / (1024 * 1024)); /* Allocate the pig area */ pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE; if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) { status = 1; goto unmap; } pig_end = pig_start + pig_sz; /* Calculate image extents. Pig image must end on a chunk boundary. */ image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1); image_start = image_end - disk_size; hibernate_read_chunks(hib, image_start, image_end, disk_size, chunks); /* Prepare the resume time pmap/page table */ hibernate_populate_resume_pt(hib, image_start, image_end); unmap: /* Unmap chunktable pages */ pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE); pmap_update(pmap_kernel()); return (status); } /* * Read the hibernated memory chunks from disk (chunk information at this * point is stored in the piglet) into the pig area specified by * [pig_start .. pig_end]. Order the chunks so that the final chunk is the * only chunk with overlap possibilities. */ int hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, paddr_t pig_end, size_t image_compr_size, struct hibernate_disk_chunk *chunks) { paddr_t img_cur, piglet_base; daddr_t blkctr; size_t processed, compressed_size, read_size; int nchunks, nfchunks, num_io_pages; vaddr_t tempva, hibernate_fchunk_area; short *fchunks, i, j; tempva = (vaddr_t)NULL; hibernate_fchunk_area = (vaddr_t)NULL; nfchunks = 0; piglet_base = hib->piglet_pa; global_pig_start = pig_start; /* * These mappings go into the resuming kernel's page table, and are * used only during image read. They disappear from existence * when the suspended kernel is unpacked on top of us. */ tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); if (!tempva) return (1); hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); if (!hibernate_fchunk_area) return (1); /* Final output chunk ordering VA */ fchunks = (short *)hibernate_fchunk_area; /* Map the chunk ordering region */ for(i = 0; i < 24 ; i++) pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE), piglet_base + ((4 + i) * PAGE_SIZE), PROT_READ | PROT_WRITE); pmap_update(pmap_kernel()); nchunks = hib->chunk_ctr; /* Initially start all chunks as unplaced */ for (i = 0; i < nchunks; i++) chunks[i].flags = 0; /* * Search the list for chunks that are outside the pig area. These * can be placed first in the final output list. */ for (i = 0; i < nchunks; i++) { if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { fchunks[nfchunks] = i; nfchunks++; chunks[i].flags |= HIBERNATE_CHUNK_PLACED; } } /* * Walk the ordering, place the chunks in ascending memory order. */ for (i = 0; i < nchunks; i++) { if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) { fchunks[nfchunks] = i; nfchunks++; chunks[i].flags = HIBERNATE_CHUNK_PLACED; } } img_cur = pig_start; for (i = 0; i < nfchunks; i++) { blkctr = chunks[fchunks[i]].offset; processed = 0; compressed_size = chunks[fchunks[i]].compressed_size; while (processed < compressed_size) { if (compressed_size - processed >= MAXPHYS) read_size = MAXPHYS; else read_size = compressed_size - processed; /* * We're reading read_size bytes, offset from the * start of a page by img_cur % PAGE_SIZE, so the * end will be read_size + (img_cur % PAGE_SIZE) * from the start of the first page. Round that * up to the next page size. */ num_io_pages = (read_size + (img_cur % PAGE_SIZE) + PAGE_SIZE - 1) / PAGE_SIZE; KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1); /* Map pages for this read */ for (j = 0; j < num_io_pages; j ++) pmap_kenter_pa(tempva + j * PAGE_SIZE, img_cur + j * PAGE_SIZE, PROT_READ | PROT_WRITE); pmap_update(pmap_kernel()); hibernate_block_io(hib, blkctr, read_size, tempva + (img_cur & PAGE_MASK), 0); blkctr += (read_size / DEV_BSIZE); pmap_kremove(tempva, num_io_pages * PAGE_SIZE); pmap_update(pmap_kernel()); processed += read_size; img_cur += read_size; } } pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE); pmap_update(pmap_kernel()); return (0); } /* * Hibernating a machine comprises the following operations: * 1. Calculating this machine's hibernate_info information * 2. Allocating a piglet and saving the piglet's physaddr * 3. Calculating the memory chunks * 4. Writing the compressed chunks to disk * 5. Writing the chunk table * 6. Writing the signature block (hibernate_info) * * On most architectures, the function calling hibernate_suspend would * then power off the machine using some MD-specific implementation. */ int hibernate_suspend(void) { uint8_t buf[DEV_BSIZE]; union hibernate_info *hib = (union hibernate_info *)&buf; u_long start, end; /* * Calculate memory ranges, swap offsets, etc. * This also allocates a piglet whose physaddr is stored in * hib->piglet_pa and vaddr stored in hib->piglet_va */ if (get_hibernate_info(hib, 1)) { DPRINTF("failed to obtain hibernate info\n"); return (1); } /* Find a page-addressed region in swap [start,end] */ if (uvm_hibswap(hib->dev, &start, &end)) { printf("hibernate: cannot find any swap\n"); return (1); } if (end - start < 1000) { printf("hibernate: insufficient swap (%lu is too small)\n", end - start + 1); return (1); } pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start, &retguard_start_phys); pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end, &retguard_end_phys); /* Calculate block offsets in swap */ hib->image_offset = ctod(start); DPRINTF("hibernate @ block %lld max-length %lu blocks\n", hib->image_offset, ctod(end) - ctod(start) + 1); pmap_activate(curproc); DPRINTF("hibernate: writing chunks\n"); if (hibernate_write_chunks(hib)) { DPRINTF("hibernate_write_chunks failed\n"); return (1); } DPRINTF("hibernate: writing chunktable\n"); if (hibernate_write_chunktable(hib)) { DPRINTF("hibernate_write_chunktable failed\n"); return (1); } DPRINTF("hibernate: writing signature\n"); if (hibernate_write_signature(hib)) { DPRINTF("hibernate_write_signature failed\n"); return (1); } /* Allow the disk to settle */ delay(500000); /* * Give the device-specific I/O function a notification that we're * done, and that it can clean up or shutdown as needed. */ hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page); return (0); } int hibernate_alloc(void) { KASSERT(global_piglet_va == 0); KASSERT(hibernate_temp_page == 0); pmap_activate(curproc); pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE, PROT_READ | PROT_WRITE); /* Allocate a piglet, store its addresses in the supplied globals */ if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE)) goto unmap; /* * Allocate VA for the temp page. * * This will become part of the suspended kernel and will * be freed in hibernate_free, upon resume (or hibernate * failure) */ hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); if (!hibernate_temp_page) { uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE); global_piglet_va = 0; goto unmap; } return (0); unmap: pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE); pmap_update(pmap_kernel()); return (ENOMEM); } /* * Free items allocated by hibernate_alloc() */ void hibernate_free(void) { pmap_activate(curproc); if (global_piglet_va) uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE); if (hibernate_temp_page) { pmap_kremove(hibernate_temp_page, PAGE_SIZE); km_free((void *)hibernate_temp_page, PAGE_SIZE, &kv_any, &kp_none); } global_piglet_va = 0; hibernate_temp_page = 0; pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE); pmap_update(pmap_kernel()); }