/dev/null | 235 ---------- arch/i386/Kconfig | 44 -- arch/i386/Makefile | 1 arch/i386/kernel/Makefile | 2 arch/i386/power/Makefile | 3 arch/i386/power/cpu.c | 141 ++++++ arch/i386/power/pmdisk.S | 94 ++++ arch/i386/power/swsusp.S | 94 ++++ drivers/acpi/sleep/main.c | 53 +- drivers/acpi/sleep/proc.c | 73 +++ drivers/acpi/sleep/sleep.h | 3 drivers/base/core.c | 33 - drivers/base/power/main.c | 13 drivers/base/power/power.h | 3 drivers/base/power/resume.c | 21 drivers/base/power/suspend.c | 10 include/asm-i386/suspend.h | 7 include/linux/suspend.h | 13 kernel/power/Kconfig | 92 ++++ kernel/power/Makefile | 1 kernel/power/console.c | 2 kernel/power/disk.c | 335 +++++++++++++++ kernel/power/main.c | 391 ++--------------- kernel/power/pmdisk.c | 942 +++++++++++++++++++++++++++++++++++++++++++ kernel/power/power.h | 41 - kernel/power/swsusp.c | 405 +++++++++++++----- kernel/sched.c | 4 kernel/sys.c | 7 28 files changed, 2249 insertions(+), 814 deletions(-) diff -puN arch/i386/Kconfig~test5-pm2 arch/i386/Kconfig --- 25/arch/i386/Kconfig~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/arch/i386/Kconfig 2003-09-10 08:12:15.000000000 -0700 @@ -814,49 +814,7 @@ endmenu menu "Power management options (ACPI, APM)" depends on !X86_VOYAGER -config PM - bool "Power Management support" - ---help--- - "Power Management" means that parts of your computer are shut - off or put into a power conserving "sleep" mode if they are not - being used. There are two competing standards for doing this: APM - and ACPI. If you want to use either one, say Y here and then also - to the requisite support below. - - Power Management is most important for battery powered laptop - computers; if you have a laptop, check out the Linux Laptop home - page on the WWW at - <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the - Battery Powered Linux mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. - - Note that, even if you say N here, Linux on the x86 architecture - will issue the hlt instruction if nothing is to be done, thereby - sending the processor to sleep and saving power. - -config SOFTWARE_SUSPEND - bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP - ---help--- - Enable the possibilty of suspendig machine. It doesn't need APM. - You may suspend your machine by 'swsusp' or 'shutdown -z <time>' - (patch for sysvinit needed). - - It creates an image which is saved in your active swaps. By the next - booting the, pass 'resume=/dev/swappartition' and kernel will - detect the saved image, restore the memory from - it and then it continues to run as before you've suspended. - If you don't want the previous state to continue use the 'noresume' - kernel option. However note that your partitions will be fsck'd and - you must re-mkswap your swap partitions. It does not work with swap - files. - - Right now you may boot without resuming and then later resume but - in meantime you cannot use those swap partitions/files which were - involved in suspending. Also in this case there is a risk that buffers - on disk won't match with saved ones. - - For more information take a look at Documentation/swsusp.txt. +source kernel/power/Kconfig source "drivers/acpi/Kconfig" diff -puN arch/i386/kernel/Makefile~test5-pm2 arch/i386/kernel/Makefile --- 25/arch/i386/kernel/Makefile~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/arch/i386/kernel/Makefile 2003-09-10 08:12:15.000000000 -0700 @@ -18,9 +18,7 @@ obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_PM) += suspend.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o diff -puN -L arch/i386/kernel/suspend_asm.S arch/i386/kernel/suspend_asm.S~test5-pm2 /dev/null --- 25/arch/i386/kernel/suspend_asm.S +++ /dev/null 2002-08-30 16:31:37.000000000 -0700 @@ -1,94 +0,0 @@ -.text - -/* Originally gcc generated, modified by hand */ - -#include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/page.h> - - .text - -ENTRY(do_magic) - pushl %ebx - cmpl $0,8(%esp) - jne .L1450 - call do_magic_suspend_1 - call save_processor_state - - movl %esp, saved_context_esp - movl %eax, saved_context_eax - movl %ebx, saved_context_ebx - movl %ecx, saved_context_ecx - movl %edx, saved_context_edx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags - - call do_magic_suspend_2 - jmp .L1449 - .p2align 4,,7 -.L1450: - movl $swapper_pg_dir-__PAGE_OFFSET,%ecx - movl %ecx,%cr3 - - call do_magic_resume_1 - movl $0,loop - cmpl $0,nr_copy_pages - je .L1453 - .p2align 4,,7 -.L1455: - movl $0,loop2 - .p2align 4,,7 -.L1459: - movl pagedir_nosave,%ecx - movl loop,%eax - movl loop2,%edx - sall $4,%eax - movl 4(%ecx,%eax),%ebx - movl (%ecx,%eax),%eax - movb (%edx,%eax),%al - movb %al,(%edx,%ebx) - movl %cr3, %eax; - movl %eax, %cr3; # flush TLB - - movl loop2,%eax - leal 1(%eax),%edx - movl %edx,loop2 - movl %edx,%eax - cmpl $4095,%eax - jbe .L1459 - movl loop,%eax - leal 1(%eax),%edx - movl %edx,loop - movl %edx,%eax - cmpl nr_copy_pages,%eax - jb .L1455 - .p2align 4,,7 -.L1453: - movl $__USER_DS,%eax - - movw %ax, %ds - movw %ax, %es - movl saved_context_esp, %esp - movl saved_context_ebp, %ebp - movl saved_context_eax, %eax - movl saved_context_ebx, %ebx - movl saved_context_ecx, %ecx - movl saved_context_edx, %edx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - call restore_processor_state - pushl saved_context_eflags ; popfl - call do_magic_resume_2 -.L1449: - popl %ebx - ret - - .section .data.nosave -loop: - .quad 0 -loop2: - .quad 0 - .previous - \ No newline at end of file diff -puN -L arch/i386/kernel/suspend.c arch/i386/kernel/suspend.c~test5-pm2 /dev/null --- 25/arch/i386/kernel/suspend.c +++ /dev/null 2002-08-30 16:31:37.000000000 -0700 @@ -1,141 +0,0 @@ -/* - * Suspend support specific for i386. - * - * Distribute under GPLv2 - * - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> - * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> - */ - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/poll.h> -#include <linux/delay.h> -#include <linux/sysrq.h> -#include <linux/proc_fs.h> -#include <linux/irq.h> -#include <linux/pm.h> -#include <linux/device.h> -#include <linux/suspend.h> -#include <linux/acpi.h> -#include <asm/uaccess.h> -#include <asm/acpi.h> -#include <asm/tlbflush.h> - -static struct saved_context saved_context; -static void fix_processor_context(void); - -unsigned long saved_context_eax, saved_context_ebx; -unsigned long saved_context_ecx, saved_context_edx; -unsigned long saved_context_esp, saved_context_ebp; -unsigned long saved_context_esi, saved_context_edi; -unsigned long saved_context_eflags; - -extern void enable_sep_cpu(void *); - -void save_processor_state(void) -{ - kernel_fpu_begin(); - - /* - * descriptor tables - */ - asm volatile ("sgdt %0" : "=m" (saved_context.gdt_limit)); - asm volatile ("sidt %0" : "=m" (saved_context.idt_limit)); - asm volatile ("sldt %0" : "=m" (saved_context.ldt)); - asm volatile ("str %0" : "=m" (saved_context.tr)); - - /* - * segment registers - */ - asm volatile ("movw %%es, %0" : "=m" (saved_context.es)); - asm volatile ("movw %%fs, %0" : "=m" (saved_context.fs)); - asm volatile ("movw %%gs, %0" : "=m" (saved_context.gs)); - asm volatile ("movw %%ss, %0" : "=m" (saved_context.ss)); - - /* - * control registers - */ - asm volatile ("movl %%cr0, %0" : "=r" (saved_context.cr0)); - asm volatile ("movl %%cr2, %0" : "=r" (saved_context.cr2)); - asm volatile ("movl %%cr3, %0" : "=r" (saved_context.cr3)); - asm volatile ("movl %%cr4, %0" : "=r" (saved_context.cr4)); -} - -static void -do_fpu_end(void) -{ - /* restore FPU regs if necessary */ - /* Do it out of line so that gcc does not move cr0 load to some stupid place */ - kernel_fpu_end(); -} - -void restore_processor_state(void) -{ - - /* - * control registers - */ - asm volatile ("movl %0, %%cr4" :: "r" (saved_context.cr4)); - asm volatile ("movl %0, %%cr3" :: "r" (saved_context.cr3)); - asm volatile ("movl %0, %%cr2" :: "r" (saved_context.cr2)); - asm volatile ("movl %0, %%cr0" :: "r" (saved_context.cr0)); - - /* - * segment registers - */ - asm volatile ("movw %0, %%es" :: "r" (saved_context.es)); - asm volatile ("movw %0, %%fs" :: "r" (saved_context.fs)); - asm volatile ("movw %0, %%gs" :: "r" (saved_context.gs)); - asm volatile ("movw %0, %%ss" :: "r" (saved_context.ss)); - - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt %0" :: "m" (saved_context.gdt_limit)); - asm volatile ("lidt %0" :: "m" (saved_context.idt_limit)); - asm volatile ("lldt %0" :: "m" (saved_context.ldt)); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(NULL); - - fix_processor_context(); - do_fpu_end(); -} - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct * t = init_tss + cpu; - - set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; - - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg[7]){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } - -} - -EXPORT_SYMBOL(save_processor_state); -EXPORT_SYMBOL(restore_processor_state); diff -puN arch/i386/Makefile~test5-pm2 arch/i386/Makefile --- 25/arch/i386/Makefile~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/arch/i386/Makefile 2003-09-10 08:12:15.000000000 -0700 @@ -103,6 +103,7 @@ drivers-$(CONFIG_MATH_EMULATION) += arch drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ +drivers-$(CONFIG_PM) += arch/i386/power/ CFLAGS += $(mflags-y) AFLAGS += $(mflags-y) diff -puN /dev/null arch/i386/power/cpu.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/power/cpu.c 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,141 @@ +/* + * Suspend support specific for i386. + * + * Distribute under GPLv2 + * + * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/poll.h> +#include <linux/delay.h> +#include <linux/sysrq.h> +#include <linux/proc_fs.h> +#include <linux/irq.h> +#include <linux/pm.h> +#include <linux/device.h> +#include <linux/suspend.h> +#include <linux/acpi.h> +#include <asm/uaccess.h> +#include <asm/acpi.h> +#include <asm/tlbflush.h> + +static struct saved_context saved_context; +static void fix_processor_context(void); + +unsigned long saved_context_eax, saved_context_ebx; +unsigned long saved_context_ecx, saved_context_edx; +unsigned long saved_context_esp, saved_context_ebp; +unsigned long saved_context_esi, saved_context_edi; +unsigned long saved_context_eflags; + +extern void enable_sep_cpu(void *); + +void save_processor_state(void) +{ + kernel_fpu_begin(); + + /* + * descriptor tables + */ + asm volatile ("sgdt %0" : "=m" (saved_context.gdt_limit)); + asm volatile ("sidt %0" : "=m" (saved_context.idt_limit)); + asm volatile ("sldt %0" : "=m" (saved_context.ldt)); + asm volatile ("str %0" : "=m" (saved_context.tr)); + + /* + * segment registers + */ + asm volatile ("movw %%es, %0" : "=m" (saved_context.es)); + asm volatile ("movw %%fs, %0" : "=m" (saved_context.fs)); + asm volatile ("movw %%gs, %0" : "=m" (saved_context.gs)); + asm volatile ("movw %%ss, %0" : "=m" (saved_context.ss)); + + /* + * control registers + */ + asm volatile ("movl %%cr0, %0" : "=r" (saved_context.cr0)); + asm volatile ("movl %%cr2, %0" : "=r" (saved_context.cr2)); + asm volatile ("movl %%cr3, %0" : "=r" (saved_context.cr3)); + asm volatile ("movl %%cr4, %0" : "=r" (saved_context.cr4)); +} + +static void +do_fpu_end(void) +{ + /* restore FPU regs if necessary */ + /* Do it out of line so that gcc does not move cr0 load to some stupid place */ + kernel_fpu_end(); +} + +void restore_processor_state(void) +{ + + /* + * control registers + */ + asm volatile ("movl %0, %%cr4" :: "r" (saved_context.cr4)); + asm volatile ("movl %0, %%cr3" :: "r" (saved_context.cr3)); + asm volatile ("movl %0, %%cr2" :: "r" (saved_context.cr2)); + asm volatile ("movl %0, %%cr0" :: "r" (saved_context.cr0)); + + /* + * segment registers + */ + asm volatile ("movw %0, %%es" :: "r" (saved_context.es)); + asm volatile ("movw %0, %%fs" :: "r" (saved_context.fs)); + asm volatile ("movw %0, %%gs" :: "r" (saved_context.gs)); + asm volatile ("movw %0, %%ss" :: "r" (saved_context.ss)); + + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt %0" :: "m" (saved_context.gdt_limit)); + asm volatile ("lidt %0" :: "m" (saved_context.idt_limit)); + asm volatile ("lldt %0" :: "m" (saved_context.ldt)); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(NULL); + + fix_processor_context(); + do_fpu_end(); +} + +static void fix_processor_context(void) +{ + int cpu = smp_processor_id(); + struct tss_struct * t = init_tss + cpu; + + set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ + cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; + + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg[7]){ + loaddebug(¤t->thread, 0); + loaddebug(¤t->thread, 1); + loaddebug(¤t->thread, 2); + loaddebug(¤t->thread, 3); + /* no 4 and 5 */ + loaddebug(¤t->thread, 6); + loaddebug(¤t->thread, 7); + } + +} + +EXPORT_SYMBOL(save_processor_state); +EXPORT_SYMBOL(restore_processor_state); diff -puN /dev/null arch/i386/power/Makefile --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/power/Makefile 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,3 @@ +obj-$(CONFIG_PM) += cpu.o +obj-$(CONFIG_PM_DISK) += pmdisk.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o diff -puN /dev/null arch/i386/power/pmdisk.S --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/power/pmdisk.S 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,94 @@ +.text + +/* Originally gcc generated, modified by hand */ + +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/page.h> + + .text + +ENTRY(pmdisk_arch_suspend) + pushl %ebx + cmpl $0,8(%esp) + jne .L1450 + call save_processor_state + + movl %esp, saved_context_esp + movl %eax, saved_context_eax + movl %ebx, saved_context_ebx + movl %ecx, saved_context_ecx + movl %edx, saved_context_edx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl ; popl saved_context_eflags + + call pmdisk_suspend + jmp .L1449 + .p2align 4,,7 +.L1450: + movl $swapper_pg_dir-__PAGE_OFFSET,%ecx + movl %ecx,%cr3 + + movl $0,loop + cmpl $0,pmdisk_pages + je .L1453 + .p2align 4,,7 +.L1455: + movl $0,loop2 + .p2align 4,,7 +.L1459: + movl pm_pagedir_nosave,%ecx + movl loop,%eax + movl loop2,%edx + sall $4,%eax + movl 4(%ecx,%eax),%ebx + movl (%ecx,%eax),%eax + movb (%edx,%eax),%al + movb %al,(%edx,%ebx) + movl %cr3, %eax; + movl %eax, %cr3; # flush TLB + + movl loop2,%eax + leal 1(%eax),%edx + movl %edx,loop2 + movl %edx,%eax + cmpl $4095,%eax + jbe .L1459 + movl loop,%eax + leal 1(%eax),%edx + movl %edx,loop + movl %edx,%eax + cmpl pmdisk_pages,%eax + jb .L1455 + .p2align 4,,7 +.L1453: + movl $__USER_DS,%eax + + movw %ax, %ds + movw %ax, %es + movl saved_context_esp, %esp + movl saved_context_ebp, %ebp + movl saved_context_eax, %eax + movl saved_context_ebx, %ebx + movl saved_context_ecx, %ecx + movl saved_context_edx, %edx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + pushl saved_context_eflags ; popfl + call pmdisk_resume +.L1449: + popl %ebx + pushl %eax + call restore_processor_state + popl %eax + ret + + .section .data.nosave +loop: + .quad 0 +loop2: + .quad 0 + .previous + diff -puN /dev/null arch/i386/power/swsusp.S --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/arch/i386/power/swsusp.S 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,94 @@ +.text + +/* Originally gcc generated, modified by hand */ + +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/page.h> + + .text + +ENTRY(do_magic) + pushl %ebx + cmpl $0,8(%esp) + jne .L1450 + call do_magic_suspend_1 + call save_processor_state + + movl %esp, saved_context_esp + movl %eax, saved_context_eax + movl %ebx, saved_context_ebx + movl %ecx, saved_context_ecx + movl %edx, saved_context_edx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl ; popl saved_context_eflags + + call do_magic_suspend_2 + jmp .L1449 + .p2align 4,,7 +.L1450: + movl $swapper_pg_dir-__PAGE_OFFSET,%ecx + movl %ecx,%cr3 + + call do_magic_resume_1 + movl $0,loop + cmpl $0,nr_copy_pages + je .L1453 + .p2align 4,,7 +.L1455: + movl $0,loop2 + .p2align 4,,7 +.L1459: + movl pagedir_nosave,%ecx + movl loop,%eax + movl loop2,%edx + sall $4,%eax + movl 4(%ecx,%eax),%ebx + movl (%ecx,%eax),%eax + movb (%edx,%eax),%al + movb %al,(%edx,%ebx) + movl %cr3, %eax; + movl %eax, %cr3; # flush TLB + + movl loop2,%eax + leal 1(%eax),%edx + movl %edx,loop2 + movl %edx,%eax + cmpl $4095,%eax + jbe .L1459 + movl loop,%eax + leal 1(%eax),%edx + movl %edx,loop + movl %edx,%eax + cmpl nr_copy_pages,%eax + jb .L1455 + .p2align 4,,7 +.L1453: + movl $__USER_DS,%eax + + movw %ax, %ds + movw %ax, %es + movl saved_context_esp, %esp + movl saved_context_ebp, %ebp + movl saved_context_eax, %eax + movl saved_context_ebx, %ebx + movl saved_context_ecx, %ecx + movl saved_context_edx, %edx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + call restore_processor_state + pushl saved_context_eflags ; popfl + call do_magic_resume_2 +.L1449: + popl %ebx + ret + + .section .data.nosave +loop: + .quad 0 +loop2: + .quad 0 + .previous + \ No newline at end of file diff -puN drivers/acpi/sleep/main.c~test5-pm2 drivers/acpi/sleep/main.c --- 25/drivers/acpi/sleep/main.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/acpi/sleep/main.c 2003-09-10 08:12:15.000000000 -0700 @@ -41,7 +41,6 @@ static u32 acpi_suspend_states[] = { static int acpi_pm_prepare(u32 state) { - int error = 0; u32 acpi_state = acpi_suspend_states[state]; if (!sleep_states[acpi_state]) @@ -56,21 +55,9 @@ static int acpi_pm_prepare(u32 state) acpi_set_firmware_waking_vector( (acpi_physical_address) acpi_wakeup_address); } - ACPI_FLUSH_CPU_CACHE(); - - /* Do arch specific saving of state. */ - if (state > PM_SUSPEND_STANDBY) { - if ((error = acpi_save_state_mem())) - goto Err; - } - acpi_enter_sleep_state_prep(acpi_state); - return 0; - Err: - acpi_set_firmware_waking_vector(0); - return error; } @@ -90,6 +77,15 @@ static int acpi_pm_enter(u32 state) u32 acpi_state = acpi_suspend_states[state]; ACPI_FLUSH_CPU_CACHE(); + + /* Do arch specific saving of state. */ + if (state > PM_SUSPEND_STANDBY) { + int error = acpi_save_state_mem(); + if (error) + return error; + } + + local_irq_save(flags); switch (state) { @@ -114,6 +110,15 @@ static int acpi_pm_enter(u32 state) local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); + /* restore processor state + * We should only be here if we're coming back from STR or STD. + * And, in the case of the latter, the memory image should have already + * been loaded from disk. + */ + if (state > PM_SUSPEND_STANDBY) + acpi_restore_state_mem(); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; } @@ -130,14 +135,6 @@ static int acpi_pm_finish(u32 state) { acpi_leave_sleep_state(state); - /* restore processor state - * We should only be here if we're coming back from STR or STD. - * And, in the case of the latter, the memory image should have already - * been loaded from disk. - */ - if (state > ACPI_STATE_S1) - acpi_restore_state_mem(); - /* reset firmware waking vector */ acpi_set_firmware_waking_vector((acpi_physical_address) 0); @@ -149,6 +146,20 @@ static int acpi_pm_finish(u32 state) } +int acpi_suspend(u32 acpi_state) +{ + u32 states[] = { + [1] = PM_SUSPEND_STANDBY, + [3] = PM_SUSPEND_MEM, + [4] = PM_SUSPEND_DISK, + }; + + if (acpi_state <= 4 && states[acpi_state]) + return pm_suspend(states[acpi_state]); + return -EINVAL; +} + + static struct pm_ops acpi_pm_ops = { .prepare = acpi_pm_prepare, .enter = acpi_pm_enter, diff -puN drivers/acpi/sleep/proc.c~test5-pm2 drivers/acpi/sleep/proc.c --- 25/drivers/acpi/sleep/proc.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/acpi/sleep/proc.c 2003-09-10 08:12:15.000000000 -0700 @@ -13,12 +13,71 @@ #include "sleep.h" +#define ACPI_SYSTEM_FILE_SLEEP "sleep" #define ACPI_SYSTEM_FILE_ALARM "alarm" #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME ("sleep") +static int acpi_system_sleep_seq_show(struct seq_file *seq, void *offset) +{ + int i; + + ACPI_FUNCTION_TRACE("acpi_system_sleep_seq_show"); + + for (i = 0; i <= ACPI_STATE_S5; i++) { + if (sleep_states[i]) { + seq_printf(seq,"S%d ", i); + if (i == ACPI_STATE_S4 && acpi_gbl_FACS->S4bios_f) + seq_printf(seq, "S4bios "); + } + } + + seq_puts(seq, "\n"); + + return 0; +} + +static int acpi_system_sleep_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, acpi_system_sleep_seq_show, PDE(inode)->data); +} + +static int +acpi_system_write_sleep ( + struct file *file, + const char *buffer, + size_t count, + loff_t *ppos) +{ + char str[12]; + u32 state = 0; + int error = 0; + + if (count > sizeof(str) - 1) + goto Done; + memset(str,0,sizeof(str)); + if (copy_from_user(str, buffer, count)) + return -EFAULT; + + /* Check for S4 bios request */ + if (!strcmp(str,"4b")) { + error = acpi_suspend(4); + goto Done; + } + state = simple_strtoul(str, NULL, 0); +#ifdef CONFIG_SOFTWARE_SUSPEND + if (state == 4) { + error = software_suspend(); + goto Done; + } +#endif + error = acpi_suspend(state); + Done: + return error ? error : count; +} + static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset) { u32 sec, min, hr; @@ -294,6 +353,14 @@ end: } +static struct file_operations acpi_system_sleep_fops = { + .open = acpi_system_sleep_open_fs, + .read = seq_read, + .write = acpi_system_write_sleep, + .llseek = seq_lseek, + .release = single_release, +}; + static struct file_operations acpi_system_alarm_fops = { .open = acpi_system_alarm_open_fs, .read = seq_read, @@ -307,6 +374,12 @@ static int acpi_sleep_proc_init(void) { struct proc_dir_entry *entry = NULL; + /* 'sleep' [R/W]*/ + entry = create_proc_entry(ACPI_SYSTEM_FILE_SLEEP, + S_IFREG|S_IRUGO|S_IWUSR, acpi_root_dir); + if (entry) + entry->proc_fops = &acpi_system_sleep_fops; + /* 'alarm' [R/W] */ entry = create_proc_entry(ACPI_SYSTEM_FILE_ALARM, S_IFREG|S_IRUGO|S_IWUSR, acpi_root_dir); diff -puN drivers/acpi/sleep/sleep.h~test5-pm2 drivers/acpi/sleep/sleep.h --- 25/drivers/acpi/sleep/sleep.h~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/acpi/sleep/sleep.h 2003-09-10 08:12:15.000000000 -0700 @@ -1,5 +1,4 @@ extern u8 sleep_states[]; - -extern acpi_status acpi_suspend (u32 state); +extern int acpi_suspend (u32 state); diff -puN drivers/base/core.c~test5-pm2 drivers/base/core.c --- 25/drivers/base/core.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/base/core.c 2003-09-10 08:12:15.000000000 -0700 @@ -225,28 +225,30 @@ int device_add(struct device *dev) dev->kobj.parent = &parent->kobj; if ((error = kobject_add(&dev->kobj))) - goto register_done; - - /* now take care of our own registration */ - + goto Error; + if ((error = device_pm_add(dev))) + goto PMError; + if ((error = bus_add_device(dev))) + goto BusError; down_write(&devices_subsys.rwsem); if (parent) list_add_tail(&dev->node,&parent->children); up_write(&devices_subsys.rwsem); - bus_add_device(dev); - - device_pm_add(dev); - /* notify platform of device entry */ if (platform_notify) platform_notify(dev); - - register_done: - if (error && parent) - put_device(parent); + Done: put_device(dev); return error; + BusError: + device_pm_remove(dev); + PMError: + kobject_unregister(&dev->kobj); + Error: + if (parent) + put_device(parent); + goto Done; } @@ -312,8 +314,6 @@ void device_del(struct device * dev) { struct device * parent = dev->parent; - device_pm_remove(dev); - down_write(&devices_subsys.rwsem); if (parent) list_del_init(&dev->node); @@ -324,14 +324,11 @@ void device_del(struct device * dev) */ if (platform_notify_remove) platform_notify_remove(dev); - bus_remove_device(dev); - + device_pm_remove(dev); kobject_del(&dev->kobj); - if (parent) put_device(parent); - } /** diff -puN drivers/base/power/main.c~test5-pm2 drivers/base/power/main.c --- 25/drivers/base/power/main.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/base/power/main.c 2003-09-10 08:12:15.000000000 -0700 @@ -36,12 +36,14 @@ DECLARE_MUTEX(dpm_sem); static inline void device_pm_hold(struct device * dev) { - atomic_inc(&dev->power.pm_users); + if (dev) + atomic_inc(&dev->power.pm_users); } static inline void device_pm_release(struct device * dev) { - atomic_inc(&dev->power.pm_users); + if (dev) + atomic_dec(&dev->power.pm_users); } @@ -61,11 +63,9 @@ static inline void device_pm_release(str void device_pm_set_parent(struct device * dev, struct device * parent) { struct device * old_parent = dev->power.pm_parent; - if (old_parent) - device_pm_release(old_parent); + device_pm_release(old_parent); dev->power.pm_parent = parent; - if (parent) - device_pm_hold(parent); + device_pm_hold(parent); } EXPORT_SYMBOL(device_pm_set_parent); @@ -91,6 +91,7 @@ void device_pm_remove(struct device * de dev->bus ? dev->bus->name : "No Bus", dev->kobj.name); down(&dpm_sem); dpm_sysfs_remove(dev); + device_pm_release(dev->power.pm_parent); list_del(&dev->power.entry); up(&dpm_sem); } diff -puN drivers/base/power/power.h~test5-pm2 drivers/base/power/power.h --- 25/drivers/base/power/power.h~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/base/power/power.h 2003-09-10 08:12:15.000000000 -0700 @@ -58,7 +58,8 @@ extern void dpm_sysfs_remove(struct devi /* * resume.c */ -extern int dpm_resume(void); + +extern void dpm_resume(void); extern void dpm_power_up(void); extern int resume_device(struct device *); diff -puN drivers/base/power/resume.c~test5-pm2 drivers/base/power/resume.c --- 25/drivers/base/power/resume.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/base/power/resume.c 2003-09-10 08:12:15.000000000 -0700 @@ -28,6 +28,19 @@ int resume_device(struct device * dev) } + +void dpm_resume(void) +{ + while(!list_empty(&dpm_off)) { + struct list_head * entry = dpm_off.next; + struct device * dev = to_device(entry); + list_del_init(entry); + resume_device(dev); + list_add_tail(entry,&dpm_active); + } +} + + /** * device_resume - Restore state of each device in system. * @@ -38,13 +51,7 @@ int resume_device(struct device * dev) void device_resume(void) { down(&dpm_sem); - while(!list_empty(&dpm_off)) { - struct list_head * entry = dpm_off.next; - struct device * dev = to_device(entry); - list_del_init(entry); - resume_device(dev); - list_add_tail(entry,&dpm_active); - } + dpm_resume(); up(&dpm_sem); } diff -puN drivers/base/power/suspend.c~test5-pm2 drivers/base/power/suspend.c --- 25/drivers/base/power/suspend.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/drivers/base/power/suspend.c 2003-09-10 08:12:15.000000000 -0700 @@ -81,14 +81,18 @@ int device_suspend(u32 state) while(!list_empty(&dpm_active)) { struct list_head * entry = dpm_active.prev; struct device * dev = to_device(entry); - if ((error = suspend_device(dev,state))) - goto Error; + if ((error = suspend_device(dev,state))) { + if (error != -EAGAIN) + goto Error; + else + error = 0; + } } Done: up(&dpm_sem); return error; Error: - device_resume(); + dpm_resume(); goto Done; } diff -puN include/asm-i386/suspend.h~test5-pm2 include/asm-i386/suspend.h --- 25/include/asm-i386/suspend.h~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/include/asm-i386/suspend.h 2003-09-10 08:12:15.000000000 -0700 @@ -6,11 +6,12 @@ #include <asm/desc.h> #include <asm/i387.h> -static inline void +static inline int arch_prepare_suspend(void) { if (!cpu_has_pse) - panic("pse required"); + return -EPERM; + return 0; } /* image of the saved processor state */ @@ -38,8 +39,6 @@ struct saved_context { extern void save_processor_state(void); extern void restore_processor_state(void); -extern int do_magic(int resume); - #ifdef CONFIG_ACPI_SLEEP extern unsigned long saved_eip; extern unsigned long saved_esp; diff -puN include/linux/suspend.h~test5-pm2 include/linux/suspend.h --- 25/include/linux/suspend.h~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/include/linux/suspend.h 2003-09-10 08:12:15.000000000 -0700 @@ -10,7 +10,7 @@ #include <linux/init.h> #include <linux/pm.h> -#ifdef CONFIG_SOFTWARE_SUSPEND +#ifdef CONFIG_PM /* page backup entry */ typedef struct pbe { unsigned long address; /* address of the copy */ @@ -53,10 +53,17 @@ extern suspend_pagedir_t *pagedir_nosave extern void do_suspend_lowlevel(int resume); extern void do_suspend_lowlevel_s4bios(int resume); +#endif /* CONFIG_PM */ + +#ifdef CONFIG_SOFTWARE_SUSPEND + +extern unsigned char software_suspend_enabled; + +extern void software_suspend(void); #else /* CONFIG_SOFTWARE_SUSPEND */ -static inline int software_suspend(void) +static inline void software_suspend(void) { - return -EPERM; + printk("Warning: fake suspend called\n"); } #endif /* CONFIG_SOFTWARE_SUSPEND */ diff -puN kernel/power/console.c~test5-pm2 kernel/power/console.c --- 25/kernel/power/console.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/power/console.c 2003-09-10 08:12:15.000000000 -0700 @@ -8,7 +8,7 @@ #include <linux/kbd_kern.h> #include "power.h" -static int new_loglevel = 7; +static int new_loglevel = 10; static int orig_loglevel; static int orig_fgconsole, orig_kmsg; diff -puN /dev/null kernel/power/disk.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/kernel/power/disk.c 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,335 @@ +/* + * kernel/power/disk.c - Suspend-to-disk support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is release under the GPLv2 + * + */ + +#define DEBUG + + +#include <linux/suspend.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include "power.h" + + +extern u32 pm_disk_mode; +extern struct pm_ops * pm_ops; + +extern int pmdisk_save(void); +extern int pmdisk_write(void); +extern int pmdisk_read(void); +extern int pmdisk_restore(void); +extern int pmdisk_free(void); + +extern long sys_sync(void); + + +/** + * power_down - Shut machine down for hibernate. + * @mode: Suspend-to-disk mode + * + * Use the platform driver, if configured so, and return gracefully if it + * fails. + * Otherwise, try to power off and reboot. If they fail, halt the machine, + * there ain't no turning back. + */ + +static int power_down(u32 mode) +{ + unsigned long flags; + int error = 0; + + local_irq_save(flags); + device_power_down(PM_SUSPEND_DISK); + switch(mode) { + case PM_DISK_PLATFORM: + error = pm_ops->enter(PM_SUSPEND_DISK); + break; + case PM_DISK_SHUTDOWN: + printk("Powering off system\n"); + machine_power_off(); + break; + case PM_DISK_REBOOT: + machine_restart(NULL); + break; + } + machine_halt(); + device_power_up(); + local_irq_restore(flags); + return 0; +} + + +static int in_suspend __nosavedata = 0; + + +/** + * free_some_memory - Try to free as much memory as possible + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped at this point, or + * livelock is possible. + */ + +static void free_some_memory(void) +{ + printk("Freeing memory: "); + while (shrink_all_memory(10000)) + printk("."); + printk("|\n"); + blk_run_queues(); +} + + +static inline void platform_finish(void) +{ + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->finish) + pm_ops->finish(PM_SUSPEND_DISK); + } +} + +static void finish(void) +{ + device_resume(); + platform_finish(); + thaw_processes(); + pm_restore_console(); +} + + +static int prepare(void) +{ + int error; + + pm_prepare_console(); + + sys_sync(); + if (freeze_processes()) { + error = -EBUSY; + goto Thaw; + } + + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->prepare) { + if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) + goto Thaw; + } + } + + /* Free memory before shutting down devices. */ + free_some_memory(); + + if ((error = device_suspend(PM_SUSPEND_DISK))) + goto Finish; + + return 0; + Finish: + platform_finish(); + Thaw: + thaw_processes(); + pm_restore_console(); + return error; +} + + +/** + * pm_suspend_disk - The granpappy of power management. + * + * If we're going through the firmware, then get it over with quickly. + * + * If not, then call pmdis to do it's thing, then figure out how + * to power down the system. + */ + +int pm_suspend_disk(void) +{ + int error; + + if ((error = prepare())) + return error; + + pr_debug("PM: Attempting to suspend to disk.\n"); + if (pm_disk_mode == PM_DISK_FIRMWARE) + return pm_ops->enter(PM_SUSPEND_DISK); + + pr_debug("PM: snapshotting memory.\n"); + in_suspend = 1; + if ((error = pmdisk_save())) + goto Done; + + if (in_suspend) { + pr_debug("PM: writing image.\n"); + + /* + * FIXME: Leftover from swsusp. Are they necessary? + */ + mb(); + barrier(); + + error = pmdisk_write(); + if (!error) { + error = power_down(pm_disk_mode); + pr_debug("PM: Power down failed.\n"); + } + } else + pr_debug("PM: Image restored successfully.\n"); + pmdisk_free(); + Done: + finish(); + return error; +} + + +/** + * pm_resume - Resume from a saved image. + * + * Called as a late_initcall (so all devices are discovered and + * initialized), we call pmdisk to see if we have a saved image or not. + * If so, we quiesce devices, the restore the saved image. We will + * return above (in pm_suspend_disk() ) if everything goes well. + * Otherwise, we fail gracefully and return to the normally + * scheduled program. + * + */ + +static int pm_resume(void) +{ + int error; + + pr_debug("PM: Reading pmdisk image.\n"); + + if ((error = pmdisk_read())) + goto Done; + + pr_debug("PM: Preparing system for restore.\n"); + + if ((error = prepare())) + goto Free; + + barrier(); + mb(); + + /* FIXME: The following (comment and mdelay()) are from swsusp. + * Are they really necessary? + * + * We do not want some readahead with DMA to corrupt our memory, right? + * Do it with disabled interrupts for best effect. That way, if some + * driver scheduled DMA, we have good chance for DMA to finish ;-). + */ + pr_debug("PM: Waiting for DMAs to settle down.\n"); + mdelay(1000); + + pr_debug("PM: Restoring saved image.\n"); + pmdisk_restore(); + pr_debug("PM: Restore failed, recovering.n"); + finish(); + Free: + pmdisk_free(); + Done: + pr_debug("PM: Resume from disk failed.\n"); + return 0; +} + +late_initcall(pm_resume); + + +static char * pm_disk_modes[] = { + [PM_DISK_FIRMWARE] = "firmware", + [PM_DISK_PLATFORM] = "platform", + [PM_DISK_SHUTDOWN] = "shutdown", + [PM_DISK_REBOOT] = "reboot", +}; + +/** + * disk - Control suspend-to-disk mode + * + * Suspend-to-disk can be handled in several ways. The greatest + * distinction is who writes memory to disk - the firmware or the OS. + * If the firmware does it, we assume that it also handles suspending + * the system. + * If the OS does it, then we have three options for putting the system + * to sleep - using the platform driver (e.g. ACPI or other PM registers), + * powering off the system or rebooting the system (for testing). + * + * The system will support either 'firmware' or 'platform', and that is + * known a priori (and encoded in pm_ops). But, the user may choose + * 'shutdown' or 'reboot' as alternatives. + * + * show() will display what the mode is currently set to. + * store() will accept one of + * + * 'firmware' + * 'platform' + * 'shutdown' + * 'reboot' + * + * It will only change to 'firmware' or 'platform' if the system + * supports it (as determined from pm_ops->pm_disk_mode). + */ + +static ssize_t disk_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); +} + + +static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +{ + int error = 0; + int i; + u32 mode = 0; + + down(&pm_sem); + for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { + if (!strcmp(buf,pm_disk_modes[i])) { + mode = i; + break; + } + } + if (mode) { + if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) + pm_disk_mode = mode; + else { + if (pm_ops && pm_ops->enter && + (mode == pm_ops->pm_disk_mode)) + pm_disk_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + pr_debug("PM: suspend-to-disk mode set to '%s'\n", + pm_disk_modes[mode]); + up(&pm_sem); + return error ? error : n; +} + +power_attr(disk); + +static struct attribute * g[] = { + &disk_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); +} + +core_initcall(pm_disk_init); diff -puN /dev/null kernel/power/Kconfig --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/kernel/power/Kconfig 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,92 @@ +config PM + bool "Power Management support" + ---help--- + "Power Management" means that parts of your computer are shut + off or put into a power conserving "sleep" mode if they are not + being used. There are two competing standards for doing this: APM + and ACPI. If you want to use either one, say Y here and then also + to the requisite support below. + + Power Management is most important for battery powered laptop + computers; if you have a laptop, check out the Linux Laptop home + page on the WWW at + <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that, even if you say N here, Linux on the x86 architecture + will issue the hlt instruction if nothing is to be done, thereby + sending the processor to sleep and saving power. + +config SOFTWARE_SUSPEND + bool "Software Suspend (EXPERIMENTAL)" + depends on EXPERIMENTAL && PM && SWAP + ---help--- + Enable the possibilty of suspendig machine. It doesn't need APM. + You may suspend your machine by 'swsusp' or 'shutdown -z <time>' + (patch for sysvinit needed). + + It creates an image which is saved in your active swaps. By the next + booting the, pass 'resume=/dev/swappartition' and kernel will + detect the saved image, restore the memory from + it and then it continues to run as before you've suspended. + If you don't want the previous state to continue use the 'noresume' + kernel option. However note that your partitions will be fsck'd and + you must re-mkswap your swap partitions. It does not work with swap + files. + + Right now you may boot without resuming and then later resume but + in meantime you cannot use those swap partitions/files which were + involved in suspending. Also in this case there is a risk that buffers + on disk won't match with saved ones. + + For more information take a look at Documentation/swsusp.txt. + +config PM_DISK + bool "Suspend-to-Disk Support" + depends on PM && SWAP + ---help--- + Suspend-to-disk is a power management state in which the contents + of memory are stored on disk and the entire system is shut down or + put into a low-power state (e.g. ACPI S4). When the computer is + turned back on, the stored image is loaded from disk and execution + resumes from where it left off before suspending. + + This config option enables the core infrastructure necessary to + perform the suspend and resume transition. + + Currently, this suspend-to-disk implementation is based on a forked + version of the swsusp code base. As such, it's still experimental, + and still relies on CONFIG_SWAP. + + More information can be found in Documentation/power/. + + If unsure, Say N. + +config PM_DISK_PARTITION + string "Default resume partition" + default "" + ---help--- + The default resume partition is the partition that the pmdisk suspend- + to-disk implementation will look for a suspended disk image. + + The partition specified here will be different for almost every user. + It should be a valid swap partition (at least for now) that is turned + on before suspending. + + The partition specified can be overridden by specifying: + + pmdisk=/dev/<other device> + + which will set the resume partition to the device specified. + + One may also do: + + pmdisk=off + + to inform the kernel not to perform a resume transition. + + Note there is currently not a way to specify which device to save the + suspended image to. It will simply pick the first available swap + device. + diff -puN kernel/power/main.c~test5-pm2 kernel/power/main.c --- 25/kernel/power/main.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/power/main.c 2003-09-10 08:12:15.000000000 -0700 @@ -8,32 +8,23 @@ * */ +#define DEBUG + #include <linux/suspend.h> #include <linux/kobject.h> -#include <linux/reboot.h> #include <linux/string.h> +#include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/pm.h> -#include <linux/fs.h> #include "power.h" -static DECLARE_MUTEX(pm_sem); - -static struct pm_ops * pm_ops = NULL; - -static u32 pm_disk_mode = PM_DISK_SHUTDOWN; - -#ifdef CONFIG_SOFTWARE_SUSPEND -static int have_swsusp = 1; -#else -static int have_swsusp = 0; -#endif - -extern long sys_sync(void); +DECLARE_MUTEX(pm_sem); +struct pm_ops * pm_ops = NULL; +u32 pm_disk_mode = PM_DISK_SHUTDOWN; /** * pm_set_ops - Set the global power method table. @@ -51,171 +42,6 @@ void pm_set_ops(struct pm_ops * ops) /** - * pm_suspend_standby - Enter 'standby' state. - * - * 'standby' is also known as 'Power-On Suspend'. Here, we power down - * devices, disable interrupts, and enter the state. - */ - -static int pm_suspend_standby(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * pm_suspend_mem - Enter suspend-to-RAM state. - * - * Identical to pm_suspend_standby() - we power down devices, disable - * interrupts, and enter the low-power state. - */ - -static int pm_suspend_mem(void) -{ - int error = 0; - unsigned long flags; - - if (!pm_ops || !pm_ops->enter) - return -EPERM; - - local_irq_save(flags); - if ((error = device_power_down(PM_SUSPEND_STANDBY))) - goto Done; - error = pm_ops->enter(PM_SUSPEND_STANDBY); - local_irq_restore(flags); - device_power_up(); - Done: - return error; -} - - -/** - * power_down - Shut machine down for hibernate. - * @mode: Suspend-to-disk mode - * - * Use the platform driver, if configured so, and return gracefully if it - * fails. - * Otherwise, try to power off and reboot. If they fail, halt the machine, - * there ain't no turning back. - */ - -static int power_down(u32 mode) -{ - unsigned long flags; - int error = 0; - - local_irq_save(flags); - device_power_down(PM_SUSPEND_DISK); - switch(mode) { - case PM_DISK_PLATFORM: - error = pm_ops->enter(PM_SUSPEND_DISK); - if (error) { - device_power_up(); - local_irq_restore(flags); - return error; - } - case PM_DISK_SHUTDOWN: - machine_power_off(); - break; - case PM_DISK_REBOOT: - machine_restart(NULL); - break; - } - machine_halt(); - return 0; -} - - -static int in_suspend __nosavedata = 0; - - -/** - * free_some_memory - Try to free as much memory as possible - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or - * livelock is possible. - */ - -static void free_some_memory(void) -{ - printk("Freeing memory: "); - while (shrink_all_memory(10000)) - printk("."); - printk("|\n"); - blk_run_queues(); -} - - -/** - * pm_suspend_disk - The granpappy of power management. - * - * If we're going through the firmware, then get it over with quickly. - * - * If not, then call swsusp to do it's thing, then figure out how - * to power down the system. - */ - -static int pm_suspend_disk(void) -{ - int error; - - pr_debug("PM: Attempting to suspend to disk.\n"); - if (pm_disk_mode == PM_DISK_FIRMWARE) - return pm_ops->enter(PM_SUSPEND_DISK); - - if (!have_swsusp) - return -EPERM; - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - if ((error = swsusp_save())) - goto Done; - - if (in_suspend) { - pr_debug("PM: writing image.\n"); - error = swsusp_write(); - if (!error) - error = power_down(pm_disk_mode); - pr_debug("PM: Power down failed.\n"); - } else - pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); - Done: - return error; -} - - - -#define decl_state(_name) \ - { .name = __stringify(_name), .fn = pm_suspend_##_name } - -struct pm_state { - char * name; - int (*fn)(void); -} pm_states[] = { - [PM_SUSPEND_STANDBY] = decl_state(standby), - [PM_SUSPEND_MEM] = decl_state(mem), - [PM_SUSPEND_DISK] = decl_state(disk), - { NULL }, -}; - - -/** * suspend_prepare - Do prep work before entering low-power state. * @state: State we're entering. * @@ -228,36 +54,47 @@ static int suspend_prepare(u32 state) { int error = 0; + if (!pm_ops || !pm_ops->enter) + return -EPERM; + pm_prepare_console(); - sys_sync(); if (freeze_processes()) { error = -EAGAIN; goto Thaw; } - if (pm_ops && pm_ops->prepare) { + if (pm_ops->prepare) { if ((error = pm_ops->prepare(state))) goto Thaw; } - /* Free memory before shutting down devices. */ - if (state == PM_SUSPEND_DISK) - free_some_memory(); - if ((error = device_suspend(state))) goto Finish; - return 0; - Done: - pm_restore_console(); - return error; Finish: - if (pm_ops && pm_ops->finish) + if (pm_ops->finish) pm_ops->finish(state); Thaw: thaw_processes(); - goto Done; + pm_restore_console(); + return error; +} + + +static int suspend_enter(u32 state) +{ + int error = 0; + unsigned long flags; + + local_irq_save(flags); + if ((error = device_power_down(state))) + goto Done; + error = pm_ops->enter(state); + device_power_up(); + Done: + local_irq_restore(flags); + return error; } @@ -279,6 +116,16 @@ static void suspend_finish(u32 state) } + + +char * pm_states[] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", + [PM_SUSPEND_DISK] = "disk", + NULL, +}; + + /** * enter_state - Do common work of entering low-power state. * @state: pm_state structure for state we're entering. @@ -293,7 +140,6 @@ static void suspend_finish(u32 state) static int enter_state(u32 state) { int error; - struct pm_state * s = &pm_states[state]; if (down_trylock(&pm_sem)) return -EBUSY; @@ -304,12 +150,17 @@ static int enter_state(u32 state) goto Unlock; } - pr_debug("PM: Preparing system for suspend.\n"); + if (state == PM_SUSPEND_DISK) { + error = pm_suspend_disk(); + goto Unlock; + } + + pr_debug("PM: Preparing system for suspend\n"); if ((error = suspend_prepare(state))) goto Unlock; pr_debug("PM: Entering state.\n"); - error = s->fn(); + error = suspend_enter(state); pr_debug("PM: Finishing up.\n"); suspend_finish(state); @@ -335,138 +186,10 @@ int pm_suspend(u32 state) } -/** - * pm_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, the restore the saved image. We will - * return above (in pm_suspend_disk() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. - * - */ - -static int pm_resume(void) -{ - int error; - - if (!have_swsusp) - return 0; - - pr_debug("PM: Reading swsusp image.\n"); - - if ((error = swsusp_read())) - goto Done; - - pr_debug("PM: Preparing system for restore.\n"); - - if ((error = suspend_prepare(PM_SUSPEND_DISK))) - goto Free; - - pr_debug("PM: Restoring saved image.\n"); - swsusp_restore(); - - pr_debug("PM: Restore failed, recovering.n"); - suspend_finish(PM_SUSPEND_DISK); - Free: - swsusp_free(); - Done: - pr_debug("PM: Resume from disk failed.\n"); - return 0; -} - -late_initcall(pm_resume); - decl_subsys(power,NULL,NULL); -#define power_attr(_name) \ -static struct subsys_attribute _name##_attr = { \ - .attr = { \ - .name = __stringify(_name), \ - .mode = 0644, \ - }, \ - .show = _name##_show, \ - .store = _name##_store, \ -} - - -static char * pm_disk_modes[] = { - [PM_DISK_FIRMWARE] = "firmware", - [PM_DISK_PLATFORM] = "platform", - [PM_DISK_SHUTDOWN] = "shutdown", - [PM_DISK_REBOOT] = "reboot", -}; - -/** - * disk - Control suspend-to-disk mode - * - * Suspend-to-disk can be handled in several ways. The greatest - * distinction is who writes memory to disk - the firmware or the OS. - * If the firmware does it, we assume that it also handles suspending - * the system. - * If the OS does it, then we have three options for putting the system - * to sleep - using the platform driver (e.g. ACPI or other PM registers), - * powering off the system or rebooting the system (for testing). - * - * The system will support either 'firmware' or 'platform', and that is - * known a priori (and encoded in pm_ops). But, the user may choose - * 'shutdown' or 'reboot' as alternatives. - * - * show() will display what the mode is currently set to. - * store() will accept one of - * - * 'firmware' - * 'platform' - * 'shutdown' - * 'reboot' - * - * It will only change to 'firmware' or 'platform' if the system - * supports it (as determined from pm_ops->pm_disk_mode). - */ - -static ssize_t disk_show(struct subsystem * subsys, char * buf) -{ - return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]); -} - - -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) -{ - int error = 0; - int i; - u32 mode = 0; - - down(&pm_sem); - for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { - if (!strcmp(buf,pm_disk_modes[i])) { - mode = i; - break; - } - } - if (mode) { - if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) - pm_disk_mode = mode; - else { - if (pm_ops && pm_ops->enter && - (mode == pm_ops->pm_disk_mode)) - pm_disk_mode = mode; - else - error = -EINVAL; - } - } else - error = -EINVAL; - - pr_debug("PM: suspend-to-disk mode set to '%s'\n", - pm_disk_modes[mode]); - up(&pm_sem); - return error ? error : n; -} - -power_attr(disk); - /** * state - control system power state. * @@ -480,27 +203,28 @@ power_attr(disk); static ssize_t state_show(struct subsystem * subsys, char * buf) { - struct pm_state * state; + int i; char * s = buf; - for (state = &pm_states[0]; state->name; state++) - s += sprintf(s,"%s ",state->name); + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i]) + s += sprintf(s,"%s ",pm_states[i]); + } s += sprintf(s,"\n"); return (s - buf); } static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) { - u32 state; - struct pm_state * s; + u32 state = PM_SUSPEND_STANDBY; + char ** s; int error; - for (state = 0; state < PM_SUSPEND_MAX; state++) { - s = &pm_states[state]; - if (s->name && !strcmp(buf,s->name)) + for (s = &pm_states[state]; *s; s++, state++) { + if (!strcmp(buf,*s)) break; } - if (s) + if (*s) error = enter_state(state); else error = -EINVAL; @@ -511,7 +235,6 @@ power_attr(state); static struct attribute * g[] = { &state_attr.attr, - &disk_attr.attr, NULL, }; @@ -520,7 +243,7 @@ static struct attribute_group attr_group }; -static int pm_init(void) +static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) diff -puN kernel/power/Makefile~test5-pm2 kernel/power/Makefile --- 25/kernel/power/Makefile~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/power/Makefile 2003-09-10 08:12:15.000000000 -0700 @@ -1,4 +1,5 @@ obj-y := main.o process.o console.o pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_PM_DISK) += disk.o pmdisk.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff -puN /dev/null kernel/power/pmdisk.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/kernel/power/pmdisk.c 2003-09-10 08:12:15.000000000 -0700 @@ -0,0 +1,942 @@ +/* + * kernel/power/pmdisk.c - Suspend-to-disk implmentation + * + * This STD implementation is initially derived from swsusp (suspend-to-swap). + * The original copyright on that was: + * + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> + * + * The additional parts are: + * + * Copyright (C) 2003 Patrick Mochel + * Copyright (C) 2003 Open Source Development Lab + * + * This file is released under the GPLv2. + * + * For more information, please see the text files in Documentation/power/ + * + */ + +#include <linux/mm.h> +#include <linux/bio.h> +#include <linux/suspend.h> +#include <linux/version.h> +#include <linux/reboot.h> +#include <linux/device.h> +#include <linux/swapops.h> +#include <linux/bootmem.h> + +#include <asm/mmu_context.h> + +#include "power.h" + + +extern int pmdisk_arch_suspend(int resume); + +#define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) +#define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) +#define ADDRESS2(x) __ADDRESS(__pa(x)) /* Needed for x86-64 where some pages are in memory twice */ + +/* References to section boundaries */ +extern char __nosave_begin, __nosave_end; + +extern int is_head_of_free_region(struct page *); + +/* Variables to be preserved over suspend */ +static int pagedir_order_check; +static int nr_copy_pages_check; + +/* For resume= kernel option */ +static char resume_file[256] = CONFIG_PM_DISK_PARTITION; + +static dev_t resume_device; +/* Local variables that should not be affected by save */ +unsigned int pmdisk_pages __nosavedata = 0; + +/* Suspend pagedir is allocated before final copy, therefore it + must be freed after resume + + Warning: this is evil. There are actually two pagedirs at time of + resume. One is "pagedir_save", which is empty frame allocated at + time of suspend, that must be freed. Second is "pagedir_nosave", + allocated at time of resume, that travels through memory not to + collide with anything. + */ +suspend_pagedir_t *pm_pagedir_nosave __nosavedata = NULL; +static suspend_pagedir_t *pagedir_save; +static int pagedir_order __nosavedata = 0; + +struct link { + char dummy[PAGE_SIZE - sizeof(swp_entry_t)]; + swp_entry_t next; +}; + +union diskpage { + union swap_header swh; + struct link link; + struct suspend_header sh; +}; + +/* + * XXX: We try to keep some more pages free so that I/O operations succeed + * without paging. Might this be more? + */ +#define PAGES_FOR_IO 512 + +static const char name_suspend[] = "Suspend Machine: "; +static const char name_resume[] = "Resume Machine: "; + +/* + * Debug + */ +#define DEBUG_DEFAULT +#undef DEBUG_PROCESS +#undef DEBUG_SLOW +#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */ + +#ifdef DEBUG_DEFAULT +# define PRINTK(f, a...) printk(f, ## a) +#else +# define PRINTK(f, a...) +#endif + +#ifdef DEBUG_SLOW +#define MDELAY(a) mdelay(a) +#else +#define MDELAY(a) +#endif + +/* + * Saving part... + */ + +static __inline__ int fill_suspend_header(struct suspend_header *sh) +{ + memset((char *)sh, 0, sizeof(*sh)); + + sh->version_code = LINUX_VERSION_CODE; + sh->num_physpages = num_physpages; + strncpy(sh->machine, system_utsname.machine, 8); + strncpy(sh->version, system_utsname.version, 20); + /* FIXME: Is this bogus? --RR */ + sh->num_cpus = num_online_cpus(); + sh->page_size = PAGE_SIZE; + sh->suspend_pagedir = pm_pagedir_nosave; + BUG_ON (pagedir_save != pm_pagedir_nosave); + sh->num_pbes = pmdisk_pages; + /* TODO: needed? mounted fs' last mounted date comparison + * [so they haven't been mounted since last suspend. + * Maybe it isn't.] [we'd need to do this for _all_ fs-es] + */ + return 0; +} + +/* We memorize in swapfile_used what swap devices are used for suspension */ +#define SWAPFILE_UNUSED 0 +#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ +#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ + +static unsigned short swapfile_used[MAX_SWAPFILES]; +static unsigned short root_swap; +#define MARK_SWAP_SUSPEND 0 +#define MARK_SWAP_RESUME 2 + +static void mark_swapfiles(swp_entry_t prev, int mode) +{ + swp_entry_t entry; + union diskpage *cur; + struct page *page; + + if (root_swap == 0xFFFF) /* ignored */ + return; + + page = alloc_page(GFP_ATOMIC); + if (!page) + panic("Out of memory in mark_swapfiles"); + cur = page_address(page); + /* XXX: this is dirty hack to get first page of swap file */ + entry = swp_entry(root_swap, 0); + rw_swap_page_sync(READ, entry, page); + + if (mode == MARK_SWAP_RESUME) { + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", + name_resume, cur->swh.magic.magic); + } else { + if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S1SUSP....",10); + else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) + memcpy(cur->swh.magic.magic,"S2SUSP....",10); + else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic); + cur->link.next = prev; /* prev is the first/last swap page of the resume area */ + /* link.next lies *no more* in last 4/8 bytes of magic */ + } + rw_swap_page_sync(WRITE, entry, page); + __free_page(page); +} + +static void read_swapfiles(void) /* This is called before saving image */ +{ + int i, len; + + len=strlen(resume_file); + root_swap = 0xFFFF; + + swap_list_lock(); + for(i=0; i<MAX_SWAPFILES; i++) { + if (swap_info[i].flags == 0) { + swapfile_used[i]=SWAPFILE_UNUSED; + } else { + if(!len) { + printk(KERN_WARNING "resume= option should be used to set suspend device" ); + if(root_swap == 0xFFFF) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else + swapfile_used[i] = SWAPFILE_IGNORED; + } else { + /* we ignore all swap devices that are not the resume_file */ + if (1) { +// FIXME if(resume_device == swap_info[i].swap_device) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else { +#if 0 + printk( "Resume: device %s (%x != %x) ignored\n", swap_info[i].swap_file->d_name.name, swap_info[i].swap_device, resume_device ); +#endif + swapfile_used[i] = SWAPFILE_IGNORED; + } + } + } + } + swap_list_unlock(); +} + +static void lock_swapdevices(void) /* This is called after saving image so modification + will be lost after resume... and that's what we want. */ +{ + int i; + + swap_list_lock(); + for(i = 0; i< MAX_SWAPFILES; i++) + if(swapfile_used[i] == SWAPFILE_IGNORED) { + swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to + lock_swapdevices can unlock the devices. */ + } + swap_list_unlock(); +} + +static int write_suspend_image(void) +{ + int i; + swp_entry_t entry, prev = { 0 }; + int nr_pgdir_pages = SUSPEND_PD_PAGES(pmdisk_pages); + union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + unsigned long address; + struct page *page; + + printk( "Writing data to swap (%d pages): ", pmdisk_pages ); + for (i=0; i<pmdisk_pages; i++) { + if (!(i%100)) + printk( "." ); + if (!(entry = get_swap_page()).val) + panic("\nNot enough swapspace when writing data" ); + + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nPage %d: not enough swapspace on suspend device", i ); + + address = (pm_pagedir_nosave+i)->address; + page = virt_to_page(address); + rw_swap_page_sync(WRITE, entry, page); + (pm_pagedir_nosave+i)->swap_address = entry; + } + printk( "|\n" ); + printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); + for (i=0; i<nr_pgdir_pages; i++) { + cur = (union diskpage *)((char *) pm_pagedir_nosave)+i; + BUG_ON ((char *) cur != (((char *) pm_pagedir_nosave) + i*PAGE_SIZE)); + printk( "." ); + if (!(entry = get_swap_page()).val) { + printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" ); + panic("Don't know how to recover"); + free_page((unsigned long) buffer); + return -ENOSPC; + } + + if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for pagedir on suspend device" ); + + BUG_ON (sizeof(swp_entry_t) != sizeof(long)); + BUG_ON (PAGE_SIZE % sizeof(struct pbe)); + + cur->link.next = prev; + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + } + printk("H"); + BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); + BUG_ON (sizeof(union diskpage) != PAGE_SIZE); + if (!(entry = get_swap_page()).val) + panic( "\nNot enough swapspace when writing header" ); + if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) + panic("\nNot enough swapspace for header on suspend device" ); + + cur = (void *) buffer; + if (fill_suspend_header(&cur->sh)) + panic("\nOut of memory while writing header"); + + cur->link.next = prev; + + page = virt_to_page((unsigned long)cur); + rw_swap_page_sync(WRITE, entry, page); + prev = entry; + + printk( "S" ); + mark_swapfiles(prev, MARK_SWAP_SUSPEND); + printk( "|\n" ); + + MDELAY(1000); + free_page((unsigned long) buffer); + return 0; +} + +/* if pagedir_p != NULL it also copies the counted pages */ +static int count_and_copy_data_pages(struct pbe *pagedir_p) +{ + int chunk_size; + int nr_copy_pages = 0; + int pfn; + struct page *page; + + BUG_ON (max_pfn != num_physpages); + + for (pfn = 0; pfn < max_pfn; pfn++) { + page = pfn_to_page(pfn); + + if (!PageReserved(page)) { + if (PageNosave(page)) + continue; + + if ((chunk_size=is_head_of_free_region(page))!=0) { + pfn += chunk_size - 1; + continue; + } + } else if (PageReserved(page)) { + BUG_ON (PageNosave(page)); + + /* + * Just copy whole code segment. Hopefully it is not that big. + */ + if ((ADDRESS(pfn) >= (unsigned long) ADDRESS2(&__nosave_begin)) && + (ADDRESS(pfn) < (unsigned long) ADDRESS2(&__nosave_end))) { + PRINTK("[nosave %lx]", ADDRESS(pfn)); + continue; + } + /* Hmm, perhaps copying all reserved pages is not too healthy as they may contain + critical bios data? */ + } else BUG(); + + nr_copy_pages++; + if (pagedir_p) { + pagedir_p->orig_address = ADDRESS(pfn); + copy_page((void *) pagedir_p->address, (void *) pagedir_p->orig_address); + pagedir_p++; + } + } + return nr_copy_pages; +} + +static void free_suspend_pagedir(unsigned long this_pagedir) +{ + struct page *page; + int pfn; + unsigned long this_pagedir_end = this_pagedir + + (PAGE_SIZE << pagedir_order); + + for(pfn = 0; pfn < num_physpages; pfn++) { + page = pfn_to_page(pfn); + if (!TestClearPageNosave(page)) + continue; + + if (ADDRESS(pfn) >= this_pagedir && ADDRESS(pfn) < this_pagedir_end) + continue; /* old pagedir gets freed in one */ + + free_page(ADDRESS(pfn)); + } + free_pages(this_pagedir, pagedir_order); +} + +static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) +{ + int i; + suspend_pagedir_t *pagedir; + struct pbe *p; + struct page *page; + + pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); + + p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); + if(!pagedir) + return NULL; + + page = virt_to_page(pagedir); + for(i=0; i < 1<<pagedir_order; i++) + SetPageNosave(page++); + + while(nr_copy_pages--) { + p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + if(!p->address) { + free_suspend_pagedir((unsigned long) pagedir); + return NULL; + } + SetPageNosave(virt_to_page(p->address)); + p->orig_address = 0; + p++; + } + return pagedir; +} + + +int pmdisk_suspend(void) +{ + struct sysinfo i; + unsigned int nr_needed_pages = 0; + + read_swapfiles(); + drain_local_pages(); + + pm_pagedir_nosave = NULL; + printk( "/critical section: Counting pages to copy" ); + pmdisk_pages = count_and_copy_data_pages(NULL); + nr_needed_pages = pmdisk_pages + PAGES_FOR_IO; + + printk(" (pages needed: %d+%d=%d free: %d)\n",pmdisk_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); + if(nr_free_pages() < nr_needed_pages) { + printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", + name_suspend, nr_needed_pages-nr_free_pages()); + root_swap = 0xFFFF; + return 1; + } + si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. + We should only consider resume_device. */ + if (i.freeswap < nr_needed_pages) { + printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", + name_suspend, nr_needed_pages-i.freeswap); + return 1; + } + + PRINTK( "Alloc pagedir\n" ); + pagedir_save = pm_pagedir_nosave = create_suspend_pagedir(pmdisk_pages); + if(!pm_pagedir_nosave) { + /* Shouldn't happen */ + printk(KERN_CRIT "%sCouldn't allocate enough pages\n",name_suspend); + panic("Really should not happen"); + return 1; + } + nr_copy_pages_check = pmdisk_pages; + pagedir_order_check = pagedir_order; + + drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ + if (pmdisk_pages != count_and_copy_data_pages(pm_pagedir_nosave)) /* copy */ + BUG(); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + printk( "critical section/: done (%d pages copied)\n", pmdisk_pages ); + return 0; +} + + +/** + * suspend_save_image - Prepare and write saved image to swap. + * + * IRQs are re-enabled here so we can resume devices and safely write + * to the swap devices. We disable them again before we leave. + * + * The second lock_swapdevices() will unlock ignored swap devices since + * writing is finished. + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ + +static int suspend_save_image(void) +{ + int error; + device_resume(); + lock_swapdevices(); + error = write_suspend_image(); + lock_swapdevices(); + return error; +} + +/* + * Magic happens here + */ + +int pmdisk_resume(void) +{ + BUG_ON (nr_copy_pages_check != pmdisk_pages); + BUG_ON (pagedir_order_check != pagedir_order); + + /* Even mappings of "global" things (vmalloc) need to be fixed */ + __flush_tlb_global(); + return 0; +} + +/* pmdisk_arch_suspend() is implemented in arch/?/power/pmdisk.S, + and basically does: + + if (!resume) { + save_processor_state(); + SAVE_REGISTERS + return pmdisk_suspend(); + } + GO_TO_SWAPPER_PAGE_TABLES + COPY_PAGES_BACK + RESTORE_REGISTERS + restore_processor_state(); + return pmdisk_resume(); + + */ + + +/* More restore stuff */ + +/* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */ +static void __init copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from) +{ + int i; + char *topointer=(char *)to, *frompointer=(char *)from; + + for(i=0; i < 1 << pagedir_order; i++) { + copy_page(topointer, frompointer); + topointer += PAGE_SIZE; + frompointer += PAGE_SIZE; + } +} + +#define does_collide(addr) does_collide_order(pm_pagedir_nosave, addr, 0) + +/* + * Returns true if given address/order collides with any orig_address + */ +static int __init does_collide_order(suspend_pagedir_t *pagedir, + unsigned long addr, int order) +{ + int i; + unsigned long addre = addr + (PAGE_SIZE<<order); + + for(i=0; i < pmdisk_pages; i++) + if((pagedir+i)->orig_address >= addr && + (pagedir+i)->orig_address < addre) + return 1; + + return 0; +} + +/* + * We check here that pagedir & pages it points to won't collide with pages + * where we're going to restore from the loaded pages later + */ +static int __init check_pagedir(void) +{ + int i; + + for(i=0; i < pmdisk_pages; i++) { + unsigned long addr; + + do { + addr = get_zeroed_page(GFP_ATOMIC); + if(!addr) + return -ENOMEM; + } while (does_collide(addr)); + + (pm_pagedir_nosave+i)->address = addr; + } + return 0; +} + +static int __init relocate_pagedir(void) +{ + /* + * We have to avoid recursion (not to overflow kernel stack), + * and that's why code looks pretty cryptic + */ + suspend_pagedir_t *new_pagedir, *old_pagedir = pm_pagedir_nosave; + void **eaten_memory = NULL; + void **c = eaten_memory, *m, *f; + + printk("Relocating pagedir"); + + if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { + printk("not necessary\n"); + return 0; + } + + while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) { + memset(m, 0, PAGE_SIZE); + if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order)) + break; + eaten_memory = m; + printk( "." ); + *eaten_memory = c; + c = eaten_memory; + } + + if (!m) + return -ENOMEM; + + pm_pagedir_nosave = new_pagedir = m; + copy_pagedir(new_pagedir, old_pagedir); + + c = eaten_memory; + while(c) { + printk(":"); + f = *c; + c = *c; + if (f) + free_pages((unsigned long)f, pagedir_order); + } + printk("|\n"); + return 0; +} + +/* + * Sanity check if this image makes sense with this kernel/swap context + * I really don't think that it's foolproof but more than nothing.. + */ + +static int __init sanity_check_failed(char *reason) +{ + printk(KERN_ERR "%s%s\n",name_resume,reason); + return -EPERM; +} + +static int __init sanity_check(struct suspend_header *sh) +{ + if(sh->version_code != LINUX_VERSION_CODE) + return sanity_check_failed("Incorrect kernel version"); + if(sh->num_physpages != num_physpages) + return sanity_check_failed("Incorrect memory size"); + if(strncmp(sh->machine, system_utsname.machine, 8)) + return sanity_check_failed("Incorrect machine type"); + if(strncmp(sh->version, system_utsname.version, 20)) + return sanity_check_failed("Incorrect version"); + if(sh->num_cpus != num_online_cpus()) + return sanity_check_failed("Incorrect number of cpus"); + if(sh->page_size != PAGE_SIZE) + return sanity_check_failed("Incorrect PAGE_SIZE"); + return 0; +} + +static struct block_device * resume_bdev; + + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static void start_io(void) +{ + atomic_set(&io_done,1); +} + +static int end_io(struct bio * bio, unsigned int num, int err) +{ + atomic_set(&io_done,0); + return 0; +} + +static void wait_io(void) +{ + blk_run_queues(); + while(atomic_read(&io_done)) + io_schedule(); +} + + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC,1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + start_io(); + submit_bio(rw,bio); + wait_io(); + Done: + bio_put(bio); + return error; +} + +static int +read_page(pgoff_t page_off, void * page) +{ + return submit(READ,page_off,page); +} + +static int +write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE,page_off,page); +} + + +extern dev_t __init name_to_dev_t(const char *line); + + +#define next_entry(diskpage) diskpage->link.next + +static int __init read_suspend_image(void) +{ + swp_entry_t next; + int i, nr_pgdir_pages; + union diskpage *cur; + int error = 0; + + cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + if (!cur) + return -ENOMEM; + + if ((error = read_page(0, cur))) + goto Done; + + /* + * We have to read next position before we overwrite it + */ + next = next_entry(cur); + + if (!memcmp("S1",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); + else if (!memcmp("S2",cur->swh.magic.magic,2)) + memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); + else if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || + (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { + printk(KERN_ERR "pmdisk: Partition is normal swap space\n"); + error = -EINVAL; + goto Done; + } else { + printk(KERN_ERR "pmdisk: Invalid partition type.\n"); + error = -EINVAL; + goto Done; + } + + /* + * Reset swap signature now. + */ + if ((error = write_page(0,cur))) + goto Done; + + printk( "%sSignature found, resuming\n", name_resume ); + MDELAY(1000); + + if ((error = read_page(swp_offset(next), cur))) + goto Done; + /* Is this same machine? */ + if ((error = sanity_check(&cur->sh))) + goto Done; + next = next_entry(cur); + + pagedir_save = cur->sh.suspend_pagedir; + pmdisk_pages = cur->sh.num_pbes; + nr_pgdir_pages = SUSPEND_PD_PAGES(pmdisk_pages); + pagedir_order = get_bitmask_order(nr_pgdir_pages); + + pm_pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); + if (!pm_pagedir_nosave) { + error = -ENOMEM; + goto Done; + } + + PRINTK( "%sReading pagedir, ", name_resume ); + + /* We get pages in reverse order of saving! */ + for (i=nr_pgdir_pages-1; i>=0; i--) { + BUG_ON (!next.val); + cur = (union diskpage *)((char *) pm_pagedir_nosave)+i; + error = read_page(swp_offset(next), cur); + if (error) + goto FreePagedir; + next = next_entry(cur); + } + BUG_ON (next.val); + + if ((error = relocate_pagedir())) + goto FreePagedir; + if ((error = check_pagedir())) + goto FreePagedir; + + printk( "Reading image data (%d pages): ", pmdisk_pages ); + for(i=0; i < pmdisk_pages; i++) { + swp_entry_t swap_address = (pm_pagedir_nosave+i)->swap_address; + if (!(i%100)) + printk( "." ); + /* You do not need to check for overlaps... + ... check_pagedir already did this work */ + error = read_page(swp_offset(swap_address), + (char *)((pm_pagedir_nosave+i)->address)); + if (error) + goto FreePagedir; + } + printk( "|\n" ); + Done: + free_page((unsigned long)cur); + return error; + FreePagedir: + free_pages((unsigned long)pm_pagedir_nosave,pagedir_order); + goto Done; +} + +/** + * pmdisk_save - Snapshot memory + */ + +int pmdisk_save(void) +{ + int error; + +#if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) + printk("pmdisk is not supported with high- or discontig-mem.\n"); + return -EPERM; +#endif + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + error = pmdisk_arch_suspend(0); + local_irq_enable(); + return error; +} + + +/** + * pmdisk_write - Write saved memory image to swap. + * + * pmdisk_arch_suspend(0) returns after system is resumed. + * + * pmdisk_arch_suspend() copies all "used" memory to "free" memory, + * then unsuspends all device drivers, and writes memory to disk + * using normal kernel mechanism. + */ + +int pmdisk_write(void) +{ + return suspend_save_image(); +} + + +/** + * pmdisk_read - Read saved image from swap. + */ + +int __init pmdisk_read(void) +{ + int error; + char b[BDEVNAME_SIZE]; + + if (!strlen(resume_file)) + return -ENOENT; + + resume_device = name_to_dev_t(resume_file); + printk("pmdisk: Resume From Partition: %s, Device: %s\n", + resume_file, __bdevname(resume_device, b)); + + resume_bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = read_suspend_image(); + blkdev_put(resume_bdev, BDEV_RAW); + } else + error = PTR_ERR(resume_bdev); + + if (!error) + PRINTK("Reading resume file was successful\n"); + else + printk( "%sError %d resuming\n", name_resume, error ); + MDELAY(1000); + return error; +} + + +/** + * pmdisk_restore - Replace running kernel with saved image. + */ + +int __init pmdisk_restore(void) +{ + int error; + local_irq_disable(); + error = pmdisk_arch_suspend(1); + local_irq_enable(); + return error; +} + + +/** + * pmdisk_free - Free memory allocated to hold snapshot. + */ + +int pmdisk_free(void) +{ + PRINTK( "Freeing prev allocated pagedir\n" ); + free_suspend_pagedir((unsigned long) pagedir_save); + return 0; +} + +static int __init pmdisk_setup(char *str) +{ + if (strlen(str)) { + if (!strcmp(str,"off")) + resume_file[0] = '\0'; + else + strncpy(resume_file, str, 255); + } else + resume_file[0] = '\0'; + return 1; +} + +__setup("pmdisk=", pmdisk_setup); + diff -puN kernel/power/power.h~test5-pm2 kernel/power/power.h --- 25/kernel/power/power.h~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/power/power.h 2003-09-10 08:12:15.000000000 -0700 @@ -9,35 +9,28 @@ #endif -#ifdef CONFIG_SOFTWARE_SUSPEND -extern int swsusp_save(void); -extern int swsusp_write(void); -extern int swsusp_read(void); -extern int swsusp_restore(void); -extern int swsusp_free(void); +#ifdef CONFIG_PM_DISK +extern int pm_suspend_disk(void); + #else -static inline int swsusp_save(void) -{ - return 0; -} -static inline int swsusp_write(void) +static inline int pm_suspend_disk(void) { - return 0; -} -static inline int swsusp_read(void) -{ - return 0; -} -static inline int swsusp_restore(void) -{ - return 0; -} -static inline int swsusp_free(void) -{ - return 0; + return -EPERM; } #endif +extern struct semaphore pm_sem; +#define power_attr(_name) \ +static struct subsys_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +extern struct subsystem power_subsys; extern int freeze_processes(void); extern void thaw_processes(void); diff -puN kernel/power/swsusp.c~test5-pm2 kernel/power/swsusp.c --- 25/kernel/power/swsusp.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/power/swsusp.c 2003-09-10 08:12:15.000000000 -0700 @@ -65,7 +65,15 @@ #include "power.h" -unsigned char software_suspend_enabled = 1; +extern long sys_sync(void); + +unsigned char software_suspend_enabled = 0; + +extern void do_magic(int resume); + +#define NORESUME 1 +#define RESUME_SPECIFIED 2 + #define __ADDRESS(x) ((unsigned long) phys_to_virt(x)) #define ADDRESS(x) __ADDRESS((x) << PAGE_SHIFT) @@ -83,7 +91,8 @@ spinlock_t suspend_pagedir_lock __nosave static int pagedir_order_check; static int nr_copy_pages_check; -static char resume_file[256]; /* For resume= kernel option */ +static int resume_status; +static char resume_file[256] = ""; /* For resume= kernel option */ static dev_t resume_device; /* Local variables that should not be affected by save */ unsigned int nr_copy_pages __nosavedata = 0; @@ -347,10 +356,15 @@ static int count_and_copy_data_pages(str int pfn; struct page *page; +#ifdef CONFIG_DISCONTIGMEM + panic("Discontingmem not supported"); +#else BUG_ON (max_pfn != num_physpages); - +#endif for (pfn = 0; pfn < max_pfn; pfn++) { page = pfn_to_page(pfn); + if (PageHighMem(page)) + panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-)."); if (!PageReserved(page)) { if (PageNosave(page)) @@ -435,6 +449,56 @@ static suspend_pagedir_t *create_suspend return pagedir; } +static int prepare_suspend_processes(void) +{ + sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */ + if (freeze_processes()) { + printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" ); + thaw_processes(); + return 1; + } + return 0; +} + +/* + * Try to free as much memory as possible, but do not OOM-kill anyone + * + * Notice: all userland should be stopped at this point, or livelock is possible. + */ +static void free_some_memory(void) +{ + printk("Freeing memory: "); + while (shrink_all_memory(10000)) + printk("."); + printk("|\n"); +} + +/* Make disk drivers accept operations, again */ +static void drivers_unsuspend(void) +{ + device_resume(); +} + +/* Called from process context */ +static int drivers_suspend(void) +{ + return device_suspend(4); +} + +#define RESUME_PHASE1 1 /* Called from interrupts disabled */ +#define RESUME_PHASE2 2 /* Called with interrupts enabled */ +#define RESUME_ALL_PHASES (RESUME_PHASE1 | RESUME_PHASE2) +static void drivers_resume(int flags) +{ + if (flags & RESUME_PHASE1) { + device_resume(); + } + if (flags & RESUME_PHASE2) { +#ifdef SUSPEND_CONSOLE + update_screen(fg_console); /* Hmm, is this the problem? */ +#endif + } +} static int suspend_prepare_image(void) { @@ -488,14 +552,12 @@ static int suspend_prepare_image(void) return 0; } -static int suspend_save_image(void) +static void suspend_save_image(void) { - int error; - - device_resume(); + drivers_unsuspend(); lock_swapdevices(); - error = write_suspend_image(); + write_suspend_image(); lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */ /* It is important _NOT_ to umount filesystems at this point. We want @@ -503,7 +565,29 @@ static int suspend_save_image(void) * filesystem clean: it is not. (And it does not matter, if we resume * correctly, we'll mark system clean, anyway.) */ - return error; +} + +static void suspend_power_down(void) +{ + extern int C_A_D; + C_A_D = 0; + printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": ""); +#ifdef CONFIG_VT + PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state); + mdelay(1000); + if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL)))) + machine_restart(NULL); + else +#endif + { + device_shutdown(); + machine_power_off(); + } + + printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend); + machine_halt(); + while (1); + /* NOTREACHED */ } /* @@ -515,21 +599,32 @@ void do_magic_resume_1(void) barrier(); mb(); spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ + PRINTK( "Waiting for DMAs to settle down...\n"); - /* We do not want some readahead with DMA to corrupt our memory, right? - Do it with disabled interrupts for best effect. That way, if some - driver scheduled DMA, we have good chance for DMA to finish ;-). */ - mdelay(1000); + mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right? + Do it with disabled interrupts for best effect. That way, if some + driver scheduled DMA, we have good chance for DMA to finish ;-). */ } void do_magic_resume_2(void) { BUG_ON (nr_copy_pages_check != nr_copy_pages); BUG_ON (pagedir_order_check != pagedir_order); - - /* Even mappings of "global" things (vmalloc) need to be fixed */ - __flush_tlb_global(); + + __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */ + + PRINTK( "Freeing prev allocated pagedir\n" ); + free_suspend_pagedir((unsigned long) pagedir_save); spin_unlock_irq(&suspend_pagedir_lock); + drivers_resume(RESUME_ALL_PHASES); + + PRINTK( "Fixing swap signatures... " ); + mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); + PRINTK( "ok\n" ); + +#ifdef SUSPEND_CONSOLE + update_screen(fg_console); /* Hmm, is this the problem? */ +#endif } /* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: @@ -554,28 +649,91 @@ void do_magic_suspend_1(void) { mb(); barrier(); + BUG_ON(in_atomic()); spin_lock_irq(&suspend_pagedir_lock); } -int do_magic_suspend_2(void) +void do_magic_suspend_2(void) { int is_problem; read_swapfiles(); is_problem = suspend_prepare_image(); spin_unlock_irq(&suspend_pagedir_lock); - if (!is_problem) - return suspend_save_image(); + if (!is_problem) { + kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */ + BUG_ON(in_atomic()); + suspend_save_image(); + suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */ + } + printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend); + MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */ + barrier(); mb(); + spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ mdelay(1000); - return -EFAULT; + + free_pages((unsigned long) pagedir_nosave, pagedir_order); + spin_unlock_irq(&suspend_pagedir_lock); + mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); +} + +static void do_software_suspend(void) +{ + arch_prepare_suspend(); + if (pm_prepare_console()) + printk( "%sCan't allocate a console... proceeding\n", name_suspend); + if (!prepare_suspend_processes()) { + + /* At this point, all user processes and "dangerous" + kernel threads are stopped. Free some memory, as we + need half of memory free. */ + + free_some_memory(); + + /* No need to invalidate any vfsmnt list -- + * they will be valid after resume, anyway. + */ + blk_run_queues(); + + /* Save state of all device drivers, and stop them. */ + if(drivers_suspend()==0) + /* If stopping device drivers worked, we proceed basically into + * suspend_save_image. + * + * do_magic(0) returns after system is resumed. + * + * do_magic() copies all "used" memory to "free" memory, then + * unsuspends all device drivers, and writes memory to disk + * using normal kernel mechanism. + */ + do_magic(0); + thaw_processes(); + } + software_suspend_enabled = 1; + MDELAY(1000); + pm_restore_console(); +} + +/* + * This is main interface to the outside world. It needs to be + * called from process context. + */ +void software_suspend(void) +{ + if(!software_suspend_enabled) + return; + + software_suspend_enabled = 0; + might_sleep(); + do_software_suspend(); } /* More restore stuff */ /* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */ -static void __init copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from) +static void copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from) { int i; char *topointer=(char *)to, *frompointer=(char *)from; @@ -592,8 +750,8 @@ static void __init copy_pagedir(suspend_ /* * Returns true if given address/order collides with any orig_address */ -static int __init does_collide_order(suspend_pagedir_t *pagedir, - unsigned long addr, int order) +static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, + int order) { int i; unsigned long addre = addr + (PAGE_SIZE<<order); @@ -610,7 +768,7 @@ static int __init does_collide_order(sus * We check here that pagedir & pages it points to won't collide with pages * where we're going to restore from the loaded pages later */ -static int __init check_pagedir(void) +static int check_pagedir(void) { int i; @@ -628,7 +786,7 @@ static int __init check_pagedir(void) return 0; } -static int __init relocate_pagedir(void) +static int relocate_pagedir(void) { /* * We have to avoid recursion (not to overflow kernel stack), @@ -678,13 +836,13 @@ static int __init relocate_pagedir(void) * I really don't think that it's foolproof but more than nothing.. */ -static int __init sanity_check_failed(char *reason) +static int sanity_check_failed(char *reason) { printk(KERN_ERR "%s%s\n",name_resume,reason); return -EPERM; } -static int __init sanity_check(struct suspend_header *sh) +static int sanity_check(struct suspend_header *sh) { if(sh->version_code != LINUX_VERSION_CODE) return sanity_check_failed("Incorrect kernel version"); @@ -701,8 +859,7 @@ static int __init sanity_check(struct su return 0; } -static int __init bdev_read_page(struct block_device *bdev, - long pos, void *buf) +static int bdev_read_page(struct block_device *bdev, long pos, void *buf) { struct buffer_head *bh; BUG_ON (pos%PAGE_SIZE); @@ -716,10 +873,31 @@ static int __init bdev_read_page(struct return 0; } +static int bdev_write_page(struct block_device *bdev, long pos, void *buf) +{ +#if 0 + struct buffer_head *bh; + BUG_ON (pos%PAGE_SIZE); + bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); + if (!bh || (!bh->b_data)) { + return -1; + } + memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */ + BUG_ON(!buffer_uptodate(bh)); + generic_make_request(WRITE, bh); + if (!buffer_uptodate(bh)) + printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file); + wait_on_buffer(bh); + brelse(bh); + return 0; +#endif + printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file); + return 0; +} + extern dev_t __init name_to_dev_t(const char *line); -static int __init read_suspend_image(struct block_device *bdev, - union diskpage *cur) +static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume) { swp_entry_t next; int i, nr_pgdir_pages; @@ -744,9 +922,18 @@ static int __init read_suspend_image(str else if (!memcmp("S2",cur->swh.magic.magic,2)) memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); else { - printk("swsusp: %s: Unable to find suspended-data signature (%.10s - misspelled?\n", + if (noresume) + return -EINVAL; + panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n", name_resume, cur->swh.magic.magic); - return -EFAULT; + } + if (noresume) { + /* We don't do a sanity check here: we want to restore the swap + whatever version of kernel made the suspend image; + We need to write swap, but swap is *not* enabled so + we must write the device directly */ + printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file); + bdev_write_page(bdev, 0, cur); } printk( "%sSignature found, resuming\n", name_resume ); @@ -796,115 +983,117 @@ static int __init read_suspend_image(str return 0; } -/** - * swsusp_save - Snapshot memory - */ - -int swsusp_save(void) -{ -#if defined (CONFIG_HIGHMEM) || defined (COFNIG_DISCONTIGMEM) - printk("swsusp is not supported with high- or discontig-mem.\n"); - return -EPERM; -#endif - return 0; -} - - -/** - * swsusp_write - Write saved memory image to swap. - * - * do_magic(0) returns after system is resumed. - * - * do_magic() copies all "used" memory to "free" memory, then - * unsuspends all device drivers, and writes memory to disk - * using normal kernel mechanism. - */ - -int swsusp_write(void) -{ - arch_prepare_suspend(); - return do_magic(0); -} - - -/** - * swsusp_read - Read saved image from swap. - */ - -int __init swsusp_read(void) +static int read_suspend_image(const char * specialfile, int noresume) { union diskpage *cur; + unsigned long scratch_page = 0; int error; char b[BDEVNAME_SIZE]; - if (!strlen(resume_file)) - return -ENOENT; - - resume_device = name_to_dev_t(resume_file); - printk("swsusp: Resume From Partition: %s, Device: %s\n", - resume_file, __bdevname(resume_device, b)); - - cur = (union diskpage *)get_zeroed_page(GFP_ATOMIC); + resume_device = name_to_dev_t(specialfile); + scratch_page = get_zeroed_page(GFP_ATOMIC); + cur = (void *) scratch_page; if (cur) { struct block_device *bdev; + printk("Resuming from device %s\n", + __bdevname(resume_device, b)); bdev = open_by_devnum(resume_device, FMODE_READ, BDEV_RAW); - if (!IS_ERR(bdev)) { + if (IS_ERR(bdev)) { + error = PTR_ERR(bdev); + } else { set_blocksize(bdev, PAGE_SIZE); - error = read_suspend_image(bdev, cur); + error = __read_suspend_image(bdev, cur, noresume); blkdev_put(bdev, BDEV_RAW); - } else - error = PTR_ERR(bdev); - free_page((unsigned long)cur); - } else - error = -ENOMEM; + } + } else error = -ENOMEM; - if (!error) - PRINTK("Reading resume file was successful\n"); - else - printk( "%sError %d resuming\n", name_resume, error ); + if (scratch_page) + free_page(scratch_page); + switch (error) { + case 0: + PRINTK("Reading resume file was successful\n"); + break; + case -EINVAL: + break; + case -EIO: + printk( "%sI/O error\n", name_resume); + break; + case -ENOENT: + printk( "%s%s: No such file or directory\n", name_resume, specialfile); + break; + case -ENOMEM: + printk( "%sNot enough memory\n", name_resume); + break; + default: + printk( "%sError %d resuming\n", name_resume, error ); + } MDELAY(1000); return error; } - -/** - * swsusp_restore - Replace running kernel with saved image. +/* + * Called from init kernel_thread. + * We check if we have an image and if so we try to resume */ -int __init swsusp_restore(void) +void software_resume(void) { - return do_magic(1); -} + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n"); + return; + } + /* We enable the possibility of machine suspend */ + software_suspend_enabled = 1; + if (!resume_status) + return; + printk( "%s", name_resume ); + if (resume_status == NORESUME) { + if(resume_file[0]) + read_suspend_image(resume_file, 1); + printk( "disabled\n" ); + return; + } + MDELAY(1000); -/** - * swsusp_free - Free memory allocated to hold snapshot. - */ + if (pm_prepare_console()) + printk("swsusp: Can't allocate a console... proceeding\n"); -int swsusp_free(void) -{ - PRINTK( "Freeing prev allocated pagedir\n" ); - free_suspend_pagedir((unsigned long) pagedir_save); + if (!resume_file[0] && resume_status == RESUME_SPECIFIED) { + printk( "suspension device unspecified\n" ); + return; + } - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); - return 0; + printk( "resuming from %s\n", resume_file); + if (read_suspend_image(resume_file, 0)) + goto read_failure; + do_magic(1); + panic("This never returns"); + +read_failure: + pm_restore_console(); + return; } static int __init resume_setup(char *str) { - if (strlen(str)) - strncpy(resume_file, str, 255); + if (resume_status == NORESUME) + return 1; + + strncpy( resume_file, str, 255 ); + resume_status = RESUME_SPECIFIED; + return 1; } static int __init noresume_setup(char *str) { - resume_file[0] = '\0'; + resume_status = NORESUME; return 1; } __setup("noresume", noresume_setup); __setup("resume=", resume_setup); +EXPORT_SYMBOL(software_suspend); +EXPORT_SYMBOL(software_suspend_enabled); diff -puN kernel/sched.c~test5-pm2 kernel/sched.c --- 25/kernel/sched.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/sched.c 2003-09-10 08:12:15.000000000 -0700 @@ -28,6 +28,7 @@ #include <linux/kernel_stat.h> #include <linux/security.h> #include <linux/notifier.h> +#include <linux/suspend.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/timer.h> @@ -2421,6 +2422,9 @@ static int migration_thread(void * data) struct list_head *head; migration_req_t *req; + if (current->flags & PF_FREEZE) + refrigerator(PF_IOTHREAD); + spin_lock_irq(&rq->lock); head = &rq->migration_queue; current->state = TASK_INTERRUPTIBLE; diff -puN kernel/sys.c~test5-pm2 kernel/sys.c --- 25/kernel/sys.c~test5-pm2 2003-09-10 08:12:15.000000000 -0700 +++ 25-akpm/kernel/sys.c 2003-09-10 08:12:15.000000000 -0700 @@ -456,8 +456,11 @@ asmlinkage long sys_reboot(int magic1, i #ifdef CONFIG_SOFTWARE_SUSPEND case LINUX_REBOOT_CMD_SW_SUSPEND: - if (!pm_suspend(PM_SUSPEND_DISK)) - break; + if (!software_suspend_enabled) { + unlock_kernel(); + return -EAGAIN; + } + software_suspend(); do_exit(0); break; #endif _