/*
 * kernel/power/prepare_image.c
 *
 * Copyright (C) 2003-2006 Nigel Cunningham <nigel@suspend.net>
 *
 * This file is released under the GPLv2.
 *
 * We need to eat memory until we can:
 * 1. Perform the save without changing anything (RAM_NEEDED < max_pfn)
 * 2. Fit it all in available space (suspend_active_writer->available_space() >=
 *    storage_needed())
 * 3. Reload the pagedir and pageset1 to places that don't collide with their
 *    final destinations, not knowing to what extent the resumed kernel will
 *    overlap with the one loaded at boot time. I think the resumed kernel
 *    should overlap completely, but I don't want to rely on this as it is 
 *    an unproven assumption. We therefore assume there will be no overlap at
 *    all (worse case).
 * 4. Meet the user's requested limit (if any) on the size of the image.
 *    The limit is in MB, so pages/256 (assuming 4K pages).
 *
 */

#include <linux/highmem.h>
#include <linux/freezer.h>
#include <linux/hardirq.h>

#include "pageflags.h"
#include "modules.h"
#include "io.h"
#include "ui.h"
#include "extent.h"
#include "prepare_image.h"
#include "block_io.h"
#include "suspend.h"

static int are_frozen = 0, num_nosave = 0;
static long header_space_allocated = 0;
static long storage_allocated = 0;
static long storage_available = 0;
long extra_pd1_pages_allowance = MIN_EXTRA_PAGES_ALLOWANCE;

/*
 * num_pcp_pages: Count pcp pages.
 */
static long num_pcp_pages(void)
{
	struct zone *zone;
	long result = 0, i = 0;

	/* PCP lists */
	for_each_zone(zone) {
		struct per_cpu_pageset *pset;
		int cpu;
		
		if (!zone->present_pages)
			continue;
		
		for (cpu = 0; cpu < NR_CPUS; cpu++) {
			if (!cpu_possible(cpu))
				continue;

			pset = zone_pcp(zone, cpu);

			for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
				struct per_cpu_pages *pcp;

				pcp = &(pset->pcp[i]);
				result += pcp->count;
			}
		}
	}
	return result;
}

/*
 * Number of free pages, including pcp pages.
 */
long real_nr_free_pages(void)
{
	return nr_free_pages() + num_pcp_pages();
}

/*
 * Discover how much extra memory will be required by the drivers
 * when they're asked to suspend. We can then ensure that amount
 * of memory is available when we really want it.
 */
static void get_extra_pd1_allowance(void)
{
	int orig_num_free = real_nr_free_pages(), final;
	
	suspend_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
	device_suspend(PMSG_FREEZE);
	local_irq_disable(); /* irqs might have been re-enabled on us */
	device_power_down(PMSG_FREEZE);
	
	final = real_nr_free_pages();

	device_power_up();
	local_irq_enable();

	device_resume();

	extra_pd1_pages_allowance = max(
		orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE,
		MIN_EXTRA_PAGES_ALLOWANCE);
}

/*
 * Amount of storage needed, possibly taking into account the
 * expected compression ratio and possibly also ignoring our
 * allowance for extra pages.
 */
static long main_storage_needed(int use_ecr,
		int ignore_extra_pd1_allow)
{
	return ((pagedir1.pageset_size + pagedir2.pageset_size +
	  (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
	 (use_ecr ? suspend_expected_compression_ratio() : 100) / 100);
}

/*
 * Storage needed for the image header, in bytes until the return.
 */
static int header_storage_needed(void)
{
	unsigned long bytes =
		(int) sizeof(struct suspend_header) +
	 	(int) suspend_header_storage_for_modules() +
		suspend_pageflags_space_needed();

	return ((int) ((bytes + (int) PAGE_SIZE - 1) >> PAGE_SHIFT));
}

static void display_stats(int always, int sub_extra_pd1_allow)
{ 
	char buffer[255];
	snprintf(buffer, 254, 
		"Free:%d(%d). Sets:%ld(%ld),%ld(%ld). Header:%d. Nosave:%d-%d=%d. Storage:%lu/%lu(%lu). Needed:%ld|%ld|%ld.\n", 
		
		/* Free */
		nr_free_pages(),
		nr_free_pages() - nr_free_highpages(),
		
		/* Sets */
		pagedir1.pageset_size, pageset1_sizelow,
		pagedir2.pageset_size, pageset2_sizelow,

		/* Header */
		header_storage_needed(),

		/* Nosave */
		num_nosave, extra_pagedir_pages_allocated,
		num_nosave - extra_pagedir_pages_allocated,

		/* Storage - converted to pages for comparison */
		storage_allocated,
		storage_needed(1, sub_extra_pd1_allow),
		storage_available,

		/* Needed */
		ram_to_suspend() - nr_free_pages() - nr_free_highpages(),
		storage_needed(1, sub_extra_pd1_allow) - storage_available, 
		(image_size_limit > 0) ? (storage_needed(1, sub_extra_pd1_allow) - (image_size_limit << 8)) : 0);
	if (always)
		printk(buffer);
	else
		suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_MEDIUM, 1, buffer);
}

/* generate_free_page_map
 *
 * Description:	This routine generates a bitmap of free pages from the
 * 		lists used by the memory manager. We then use the bitmap
 * 		to quickly calculate which pages to save and in which
 * 		pagesets.
 */
static void generate_free_page_map(void) 
{
	int order, loop, cpu;
	struct page *page;
	unsigned long flags, i;
	struct zone *zone;
	struct per_cpu_pageset *pset;

	for_each_zone(zone) {
		if (!zone->present_pages)
			continue;
		for(i=0; i < zone->spanned_pages; i++)
			SetPageInUse(pfn_to_page(zone->zone_start_pfn + i));
	}
	
	for_each_zone(zone) {
		if (!zone->present_pages)
			continue;
		spin_lock_irqsave(&zone->lock, flags);
		for (order = MAX_ORDER - 1; order >= 0; --order) {
			list_for_each_entry(page, &zone->free_area[order].free_list, lru)
				for(loop=0; loop < (1 << order); loop++)
					ClearPageInUse(page+loop);
		}

		
		for (cpu = 0; cpu < NR_CPUS; cpu++) {
			if (!cpu_possible(cpu))
				continue;

			pset = zone_pcp(zone, cpu);

			for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
				struct per_cpu_pages *pcp;
				struct page *page;

				pcp = &pset->pcp[i];
				list_for_each_entry(page, &pcp->list, lru)
					ClearPageInUse(page);
			}
		}
		
		spin_unlock_irqrestore(&zone->lock, flags);
	}
}

/* size_of_free_region
 * 
 * Description:	Return the number of pages that are free, beginning with and 
 * 		including this one.
 */
static int size_of_free_region(struct page *page)
{
	struct zone *zone = page_zone(page);
	struct page *posn = page, *last_in_zone =
		pfn_to_page(zone->zone_start_pfn) + zone->spanned_pages - 1;

	while (posn < last_in_zone && !PageInUse(posn)) {
		BUG_ON(PagePageset2(posn));
		posn++;
	}
	return (posn - page);
}

static struct page *rotext_start, *rotext_end;
static struct page *rodata_start, *rodata_end;
static struct page *nosave_start, *nosave_end;

static __init int page_nosave_init(void)
{
	rodata_start = rodata_start_page();
	rodata_end = rodata_end_page();

	rotext_start = rotext_start_page();
	rotext_end = rotext_end_page();
	
	nosave_start = nosave_start_page();
	nosave_end = nosave_end_page();

	return 0;
}

subsys_initcall(page_nosave_init);

/* count_data_pages
 *
 * This routine generates our lists of pages to be stored in each
 * pageset. Since we store the data using extents, and adding new
 * extents might allocate a new extent page, this routine may well
 * be called more than once.
 */
static struct pageset_sizes_result count_data_pages(void)
{
	int num_free = 0;
	unsigned long loop;
	int use_pagedir2;
	struct pageset_sizes_result result;
	struct zone *zone;

	result.size1 = 0;
	result.size1low = 0;
	result.size2 = 0;
	result.size2low = 0;

	num_nosave = 0;

	clear_dyn_pageflags(pageset1_map);

	generate_free_page_map();

	if (test_result_state(SUSPEND_ABORTED))
		return result;

	/*
	 * Pages not to be saved are marked Nosave irrespective of being reserved
	 */
	for_each_zone(zone) {
		if (!populated_zone(zone))
			continue;

		for (loop = 0; loop < zone->spanned_pages; loop++) {
			unsigned long pfn = zone->zone_start_pfn + loop;
			struct page *page;
			int chunk_size;

			if (!pfn_valid(pfn))
				continue;

			page = pfn_to_page(pfn);
			chunk_size = size_of_free_region(page);

			if (PageNosave(page) ||
			    (page >= rodata_start && page < rodata_end) ||
			    (PageReserved(page) &&
			     ((page >= nosave_start && page < nosave_end) ||
			      is_highmem(zone)))) {
				num_nosave++;
				continue;
			}

			if (chunk_size) {
				num_free += chunk_size;
				loop += chunk_size - 1;
				continue;
			}

			use_pagedir2 = PagePageset2(page);

			if (use_pagedir2) {
				result.size2++;
				if (!PageHighMem(page)) {
					result.size2low++;
					SetPagePageset1Copy(page);
				}
			} else {
				result.size1++;
				SetPagePageset1(page);
				if (!PageHighMem(page))
					result.size1low++;
			}
		}
	}

	suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_MEDIUM, 0,
		"Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%d) + NumFree (%d) = %d.\n",
		result.size1, result.size2, num_nosave, num_free,
		result.size1 + result.size2 + num_nosave + num_free);
	return result;
}

/* amount_needed
 *
 * Calculates the amount by which the image size needs to be reduced to meet
 * our constraints.
 */
static int amount_needed(int use_image_size_limit)
{

	int max1 = max( (int) (ram_to_suspend() - real_nr_free_pages() - 
			  nr_free_highpages()),
			((int) (storage_needed(1, 0) -  
			  storage_available)));
	if (use_image_size_limit)
		return max( max1,
			    (image_size_limit > 0) ? 
			    ((int) (storage_needed(1, 0) - (image_size_limit << 8))) : 0);
	return max1;
}

/* suspend_recalculate_image_contents
 *
 * Eaten is the number of pages which have been eaten.
 * Pagedirincluded is the number of pages which have been allocated for the pagedir.
 */
void suspend_recalculate_image_contents(int atomic_copy) 
{
	struct pageset_sizes_result result;

	clear_dyn_pageflags(pageset1_map);
	if (!atomic_copy) {
		int pfn;
		BITMAP_FOR_EACH_SET(pageset2_map, pfn)
			ClearPagePageset1Copy(pfn_to_page(pfn));
		/* Need to call this before getting pageset1_size! */
		suspend_mark_pages_for_pageset2();
	}
	BUG_ON(in_atomic() && !irqs_disabled());
	result = count_data_pages();
	pageset1_sizelow = result.size1low;
	pageset2_sizelow = result.size2low;
	pagedir1.lastpageset_size = pagedir1.pageset_size = result.size1;
	pagedir2.lastpageset_size = pagedir2.pageset_size = result.size2;

	if (!atomic_copy) {
		storage_available = suspend_active_writer->storage_available();
		display_stats(1, 0);
	}
	BUG_ON(in_atomic() && !irqs_disabled());
	return;
}

static void try_freeze_processes(void)
{
	if (freeze_processes()) {
		set_result_state(SUSPEND_FREEZING_FAILED);
		set_result_state(SUSPEND_ABORTED);
	}
}

/* update_image
 *
 * Allocate [more] memory and storage for the image.
 */
static int update_image(void) 
{ 
	int result2, param_used;

	suspend_recalculate_image_contents(0);

	/* Include allowance for growth in pagedir1 while writing pagedir 2 */
	if (suspend_allocate_extra_pagedir_memory(&pagedir1,
		pagedir1.pageset_size + extra_pd1_pages_allowance,
				pageset2_sizelow)) {
		suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_LOW, 1,
			"Still need to get more pages for pagedir 1.\n");
		return 1;
	}

	thaw_processes(FREEZER_KERNEL_THREADS);

	param_used = main_storage_needed(1, 0);
	if ((result2 = suspend_active_writer->allocate_storage(param_used))) {
		suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_LOW, 1,
			"Allocate storage returned %d. Still need to get more"
			" storage space for the image proper.\n",
			result2);
		storage_allocated = suspend_active_writer->storage_allocated();
		try_freeze_processes();
		return 1;
	}

	/* 
	 * Allocate remaining storage space, if possible, up to the
	 * maximum we know we'll need. It's okay to allocate the
	 * maximum if the writer is the swapwriter, but
	 * we don't want to grab all available space on an NFS share.
	 * We therefore ignore the expected compression ratio here,
	 * thereby trying to allocate the maximum image size we could
	 * need (assuming compression doesn't expand the image), but
	 * don't complain if we can't get the full amount we're after.
	 */

	suspend_active_writer->allocate_storage(
		min(storage_available, main_storage_needed(0, 1)));

	storage_allocated = suspend_active_writer->storage_allocated();

	/* Allocate the header storage after allocating main storage
	 * so that the overhead for metadata doesn't change the amount
	 * of storage needed for the header itself.
	 */

	param_used = header_storage_needed();

	result2 = suspend_active_writer->allocate_header_space(param_used);

	try_freeze_processes();

	if (result2) {
		suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_LOW, 1,
			"Still need to get more storage space for header.\n");
		return 1;
	}

	header_space_allocated = param_used;

	suspend_recalculate_image_contents(0);

	suspend_message(SUSPEND_EAT_MEMORY, SUSPEND_LOW, 1,
		"Amount still needed (%d) > 0:%d. Header: %d < %d: %d,"
		" Storage allocd: %d < %d + %d: %d.\n",
			amount_needed(0),
			(amount_needed(0) > 0),
			header_space_allocated, header_storage_needed(),
			header_space_allocated < header_storage_needed(),
		 	storage_allocated,
			header_storage_needed(), main_storage_needed(1, 1),
			storage_allocated <
			(header_storage_needed() + main_storage_needed(1, 1)));

	suspend_cond_pause(0, NULL);

	return ((amount_needed(0) > 0) ||
		header_space_allocated < header_storage_needed() ||
		 storage_allocated < 
		 (header_storage_needed() + main_storage_needed(1, 1)));
}

/* attempt_to_freeze
 * 
 * Try to freeze processes.
 */

static int attempt_to_freeze(void)
{
	int result;
	
	/* Stop processes before checking again */
	thaw_processes(FREEZER_ALL_THREADS);
	suspend_prepare_status(CLEAR_BAR, "Freezing processes");
	result = freeze_processes();

	if (result) {
		set_result_state(SUSPEND_ABORTED);
		set_result_state(SUSPEND_FREEZING_FAILED);
	} else
		are_frozen = 1;

	return result;
}

long storage_needed(int use_ecr, int ignore_extra_pd1_allow)
{
	return 	(main_storage_needed(use_ecr, ignore_extra_pd1_allow)
		       + header_storage_needed());
}

long ram_to_suspend(void)
{
	return (1 + 
		max_t(long, (pagedir1.pageset_size + extra_pd1_pages_allowance - 
			pageset2_sizelow - extra_pagedir_pages_allocated) / 2, 0) +
		MIN_FREE_RAM + suspend_memory_for_modules());
}

/* eat_memory
 *
 * Try to free some memory, either to meet hard or soft constraints on the image
 * characteristics.
 * 
 * Hard constraints:
 * - Pageset1 must be < half of memory;
 * - We must have enough memory free at resume time to have pageset1
 *   be able to be loaded in pages that don't conflict with where it has to
 *   be restored.
 * Soft constraints
 * - User specificied image size limit.
 */
static int eat_memory(void)
{
	int amount_wanted = 0;
	int free_flags = 0, did_eat_memory = 0;
	
	/*
	 * Note that if we have enough storage space and enough free memory, we may
	 * exit without eating anything. We give up when the last 10 iterations ate
	 * no extra pages because we're not going to get much more anyway, but
	 * the few pages we get will take a lot of time.
	 *
	 * We freeze processes before beginning, and then unfreeze them if we
	 * need to eat memory until we think we have enough. If our attempts
	 * to freeze fail, we give up and abort.
	 */

	/* -- Stage 1: Freeze Processes -- */

	
	suspend_recalculate_image_contents(0);
	amount_wanted = amount_needed(1);

	switch (image_size_limit) {
		case -1: /* Don't eat any memory */
			if (amount_wanted > 0) {
				set_result_state(SUSPEND_ABORTED);
				set_result_state(SUSPEND_WOULD_EAT_MEMORY);
			}
			break;
		case -2:  /* Free caches only */
			free_flags = GFP_NOIO | __GFP_HIGHMEM;
			amount_wanted = 1 << 31; /* As much cache as we can get */
			break;
		default:
			free_flags = GFP_ATOMIC | __GFP_HIGHMEM;
	}
		
	thaw_processes(FREEZER_KERNEL_THREADS);

	/* -- Stage 2: Eat memory -- */

	if (amount_wanted > 0 && !test_result_state(SUSPEND_ABORTED) &&
			image_size_limit != -1) {

		suspend_prepare_status(CLEAR_BAR, "Seeking to free %dMB of memory.", MB(amount_wanted));

		shrink_all_memory(amount_wanted);
		suspend_recalculate_image_contents(0);

		did_eat_memory = 1;

		suspend_cond_pause(0, NULL);
	}

	if (freeze_processes()) {
		set_result_state(SUSPEND_FREEZING_FAILED);
		set_result_state(SUSPEND_ABORTED);
	}
	
	if (did_eat_memory) {
		unsigned long orig_state = get_suspend_state();
		/* Freeze_processes will call sys_sync too */
		restore_suspend_state(orig_state);
		suspend_recalculate_image_contents(0);
	}

	/* Blank out image size display */
	suspend_update_status(100, 100, NULL);

	if (!test_result_state(SUSPEND_ABORTED) &&
	    (amount_needed(0) - extra_pd1_pages_allowance > 0)) {
		printk("Unable to free sufficient memory to suspend. Still need %d pages.\n",
			amount_needed(1));
		display_stats(1, 1);
		set_result_state(SUSPEND_ABORTED);
		set_result_state(SUSPEND_UNABLE_TO_FREE_ENOUGH_MEMORY);
	}

	return 0;
}

/* suspend_prepare_image
 *
 * Entry point to the whole image preparation section.
 *
 * We do four things:
 * - Freeze processes;
 * - Ensure image size constraints are met;
 * - Complete all the preparation for saving the image,
 *   including allocation of storage. The only memory
 *   that should be needed when we're finished is that
 *   for actually storing the image (and we know how
 *   much is needed for that because the modules tell
 *   us).
 * - Make sure that all dirty buffers are written out.
 */
#define MAX_TRIES 4
int suspend_prepare_image(void)
{
	int result = 1, tries = 0;

	are_frozen = 0;

	header_space_allocated = 0;

	if (attempt_to_freeze())
		return 1;

	if (!extra_pd1_pages_allowance)
		get_extra_pd1_allowance();

	storage_available = suspend_active_writer->storage_available();

	if (!storage_available) {
		printk(KERN_ERR "You need some storage available to be able to suspend.\n");
		set_result_state(SUSPEND_ABORTED);
		set_result_state(SUSPEND_NOSTORAGE_AVAILABLE);
		return 1;
	}

	do {
		suspend_prepare_status(CLEAR_BAR, "Preparing Image.");
	
		if (eat_memory() || test_result_state(SUSPEND_ABORTED))
			break;

		result = update_image();

		suspend_cond_pause(0, NULL);
		
		tries++;

	} while ((result) && (tries < MAX_TRIES) && (!test_result_state(SUSPEND_ABORTED)) &&
		(!test_result_state(SUSPEND_UNABLE_TO_FREE_ENOUGH_MEMORY)));

	if (tries == MAX_TRIES) {
		abort_suspend("Unable to successfully prepare the image.\n");
		display_stats(1, 0);
	}

	suspend_cond_pause(1, "Image preparation complete.");

	return result;
}
