From: Paul Mackerras <paulus@samba.org>

On pSeries systems, according to the platform architecture specs, we are
supposed to be supplying a structure to firmware that tells firmware about
our capabilities, such as which version of the data structures that
describe available memory we are expecting to see.  The way we end up
having to supply this data structure is a bit gross, since it was designed
for AIX and doesn't suit us very well.  This patch adds the code to supply
this data structure to the firmware.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/ppc64/boot/addnote.c     |   60 +++++++++++++++++++----
 arch/ppc64/kernel/prom_init.c |  107 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+), 10 deletions(-)

diff -puN arch/ppc64/boot/addnote.c~ppc64-tell-firmware-about-kernel-capabilities arch/ppc64/boot/addnote.c
--- 25/arch/ppc64/boot/addnote.c~ppc64-tell-firmware-about-kernel-capabilities	2005-04-28 00:01:29.923121360 -0700
+++ 25-akpm/arch/ppc64/boot/addnote.c	2005-04-28 00:01:29.929120448 -0700
@@ -19,6 +19,7 @@
 #include <unistd.h>
 #include <string.h>
 
+/* CHRP note section */
 char arch[] = "PowerPC";
 
 #define N_DESCR	6
@@ -31,6 +32,29 @@ unsigned int descr[N_DESCR] = {
 	0x4000,			/* load-base */
 };
 
+/* RPA note section */
+char rpaname[] = "IBM,RPA-Client-Config";
+
+/*
+ * Note: setting ignore_my_client_config *should* mean that OF ignores
+ * all the other fields, but there is a firmware bug which means that
+ * it looks at the splpar field at least.  So these values need to be
+ * reasonable.
+ */
+#define N_RPA_DESCR	8
+unsigned int rpanote[N_RPA_DESCR] = {
+	0,			/* lparaffinity */
+	64,			/* min_rmo_size */
+	0,			/* min_rmo_percent */
+	40,			/* max_pft_size */
+	1,			/* splpar */
+	-1,			/* min_load */
+	0,			/* new_mem_def */
+	1,			/* ignore_my_client_config */
+};
+
+#define ROUNDUP(len)	(((len) + 3) & ~3)
+
 unsigned char buf[512];
 
 #define GET_16BE(off)	((buf[off] << 8) + (buf[(off)+1]))
@@ -69,7 +93,7 @@ main(int ac, char **av)
 {
 	int fd, n, i;
 	int ph, ps, np;
-	int nnote, ns;
+	int nnote, nnote2, ns;
 
 	if (ac != 2) {
 		fprintf(stderr, "Usage: %s elf-file\n", av[0]);
@@ -81,7 +105,8 @@ main(int ac, char **av)
 		exit(1);
 	}
 
-	nnote = strlen(arch) + 1 + (N_DESCR + 3) * 4;
+	nnote = 12 + ROUNDUP(strlen(arch) + 1) + sizeof(descr);
+	nnote2 = 12 + ROUNDUP(strlen(rpaname) + 1) + sizeof(rpanote);
 
 	n = read(fd, buf, sizeof(buf));
 	if (n < 0) {
@@ -104,7 +129,7 @@ main(int ac, char **av)
 	np = GET_16BE(E_PHNUM);
 	if (ph < E_HSIZE || ps < PH_HSIZE || np < 1)
 		goto notelf;
-	if (ph + (np + 1) * ps + nnote > n)
+	if (ph + (np + 2) * ps + nnote + nnote2 > n)
 		goto nospace;
 
 	for (i = 0; i < np; ++i) {
@@ -117,12 +142,12 @@ main(int ac, char **av)
 	}
 
 	/* XXX check that the area we want to use is all zeroes */
-	for (i = 0; i < ps + nnote; ++i)
+	for (i = 0; i < 2 * ps + nnote + nnote2; ++i)
 		if (buf[ph + i] != 0)
 			goto nospace;
 
 	/* fill in the program header entry */
-	ns = ph + ps;
+	ns = ph + 2 * ps;
 	PUT_32BE(ph + PH_TYPE, PT_NOTE);
 	PUT_32BE(ph + PH_OFFSET, ns);
 	PUT_32BE(ph + PH_FILESZ, nnote);
@@ -134,11 +159,26 @@ main(int ac, char **av)
 	PUT_32BE(ns + 8, 0x1275);
 	strcpy(&buf[ns + 12], arch);
 	ns += 12 + strlen(arch) + 1;
-	for (i = 0; i < N_DESCR; ++i)
-		PUT_32BE(ns + i * 4, descr[i]);
+	for (i = 0; i < N_DESCR; ++i, ns += 4)
+		PUT_32BE(ns, descr[i]);
+
+	/* fill in the second program header entry and the RPA note area */
+	ph += ps;
+	PUT_32BE(ph + PH_TYPE, PT_NOTE);
+	PUT_32BE(ph + PH_OFFSET, ns);
+	PUT_32BE(ph + PH_FILESZ, nnote2);
+
+	/* fill in the note area we point to */
+	PUT_32BE(ns, strlen(rpaname) + 1);
+	PUT_32BE(ns + 4, sizeof(rpanote));
+	PUT_32BE(ns + 8, 0x12759999);
+	strcpy(&buf[ns + 12], rpaname);
+	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
+	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
+		PUT_32BE(ns, rpanote[i]);
 
 	/* Update the number of program headers */
-	PUT_16BE(E_PHNUM, np + 1);
+	PUT_16BE(E_PHNUM, np + 2);
 
 	/* write back */
 	lseek(fd, (long) 0, SEEK_SET);
@@ -155,11 +195,11 @@ main(int ac, char **av)
 	exit(0);
 
  notelf:
-	fprintf(stderr, "%s does not appear to be an ELF file\n", av[0]);
+	fprintf(stderr, "%s does not appear to be an ELF file\n", av[1]);
 	exit(1);
 
  nospace:
 	fprintf(stderr, "sorry, I can't find space in %s to put the note\n",
-		av[0]);
+		av[1]);
 	exit(1);
 }
diff -puN arch/ppc64/kernel/prom_init.c~ppc64-tell-firmware-about-kernel-capabilities arch/ppc64/kernel/prom_init.c
--- 25/arch/ppc64/kernel/prom_init.c~ppc64-tell-firmware-about-kernel-capabilities	2005-04-28 00:01:29.925121056 -0700
+++ 25-akpm/arch/ppc64/kernel/prom_init.c	2005-04-28 00:01:29.931120144 -0700
@@ -493,6 +493,113 @@ static void __init early_cmdline_parse(v
 }
 
 /*
+ * To tell the firmware what our capabilities are, we have to pass
+ * it a fake 32-bit ELF header containing a couple of PT_NOTE sections
+ * that contain structures that contain the actual values.
+ */
+static struct fake_elf {
+	Elf32_Ehdr	elfhdr;
+	Elf32_Phdr	phdr[2];
+	struct chrpnote {
+		u32	namesz;
+		u32	descsz;
+		u32	type;
+		char	name[8];	/* "PowerPC" */
+		struct chrpdesc {
+			u32	real_mode;
+			u32	real_base;
+			u32	real_size;
+			u32	virt_base;
+			u32	virt_size;
+			u32	load_base;
+		} chrpdesc;
+	} chrpnote;
+	struct rpanote {
+		u32	namesz;
+		u32	descsz;
+		u32	type;
+		char	name[24];	/* "IBM,RPA-Client-Config" */
+		struct rpadesc {
+			u32	lpar_affinity;
+			u32	min_rmo_size;
+			u32	min_rmo_percent;
+			u32	max_pft_size;
+			u32	splpar;
+			u32	min_load;
+			u32	new_mem_def;
+			u32	ignore_me;
+		} rpadesc;
+	} rpanote;
+} fake_elf = {
+	.elfhdr = {
+		.e_ident = { 0x7f, 'E', 'L', 'F',
+			     ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
+		.e_type = ET_EXEC,	/* yeah right */
+		.e_machine = EM_PPC,
+		.e_version = EV_CURRENT,
+		.e_phoff = offsetof(struct fake_elf, phdr),
+		.e_phentsize = sizeof(Elf32_Phdr),
+		.e_phnum = 2
+	},
+	.phdr = {
+		[0] = {
+			.p_type = PT_NOTE,
+			.p_offset = offsetof(struct fake_elf, chrpnote),
+			.p_filesz = sizeof(struct chrpnote)
+		}, [1] = {
+			.p_type = PT_NOTE,
+			.p_offset = offsetof(struct fake_elf, rpanote),
+			.p_filesz = sizeof(struct rpanote)
+		}
+	},
+	.chrpnote = {
+		.namesz = sizeof("PowerPC"),
+		.descsz = sizeof(struct chrpdesc),
+		.type = 0x1275,
+		.name = "PowerPC",
+		.chrpdesc = {
+			.real_mode = ~0U,	/* ~0 means "don't care" */
+			.real_base = ~0U,
+			.real_size = ~0U,
+			.virt_base = ~0U,
+			.virt_size = ~0U,
+			.load_base = ~0U
+		},
+	},
+	.rpanote = {
+		.namesz = sizeof("IBM,RPA-Client-Config"),
+		.descsz = sizeof(struct rpadesc),
+		.type = 0x12759999,
+		.name = "IBM,RPA-Client-Config",
+		.rpadesc = {
+			.lpar_affinity = 0,
+			.min_rmo_size = 64,	/* in megabytes */
+			.min_rmo_percent = 0,
+			.max_pft_size = 48,	/* 2^48 bytes max PFT size */
+			.splpar = 1,
+			.min_load = ~0U,
+			.new_mem_def = 0
+		}
+	}
+};
+
+static void __init prom_send_capabilities(void)
+{
+	unsigned long offset = reloc_offset();
+	ihandle elfloader;
+	int ret;
+
+	elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader"));
+	if (elfloader == 0) {
+		prom_printf("couldn't open /packages/elf-loader\n");
+		return;
+	}
+	ret = call_prom("call-method", 3, 1, ADDR("process-elf-header"),
+			elfloader, ADDR(&fake_elf));
+	call_prom("close", 1, 0, elfloader);
+}
+
+/*
  * Memory allocation strategy... our layout is normally:
  *
  *  at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd
_