Over the last 4 years I’ve been part of a small group of security researchers that used OneNote as their primary means of recording detailed investigative notes. The solution worked well however the group had outgrown this methodology and with Microsoft deprecating older versions I suddenly had to archive thousands of OneNote notebooks/pages.

Unfortunately not once could I successfully export an entire notebook without OneNote crashing. A quick search shows I’m not alone and I was limited to PDF instead of HTML when exporting an entire notebook.

After a few hours I came up with a quick PowerShell script that would recursively export as HTML every notebook/section/page (including attachments). My PowerShell isn’t great but it worked well for my requirements and now I can search through my old notes with a quick grep.

If you are in the same boat you may find the script below useful.

# https://stackoverflow.com/questions/53689087/powershell-and-onenote
# http://thebackend.info/powershell/2017/12/onenote-read-and-write-content-with-powershell/
# https://stackoverflow.com/questions/53639041/how-to-access-contents-of-onenote-page

# Get export folder
Function Get-Folder($initialDirectory) {
	[System.Reflection.Assembly]::LoadWithPartialName("System.windows.forms")|Out-Null
	$foldername = New-Object System.Windows.Forms.FolderBrowserDialog
	$foldername.Description = "Select an export folder"
	$foldername.rootfolder = "MyComputer"
	if($foldername.ShowDialog() -eq "OK")
	{
		$folder += $foldername.SelectedPath
	}
	return $folder
}

# Spider and find each page, create directory for each group
Function Spider-OneNote-Notebook {
	param( $onenote, $node, $path )
	foreach($child in $node.ChildNodes) {
		if ($child.HasChildNodes) {
			if ($child.isRecycleBin -ne 'true') {
				$folder = Join-Path -Path $path -ChildPath $child.name
				New-Item -Path $folder -ItemType directory | Out-Null
				Write-Host "Section: $($folder)"
				Spider-OneNote-Notebook -onenote $onenote -node $child -path $folder
			}
		} else {
			Export-OneNote-Page -onenote $onenote -node $child -path $path
		}
	}
}

# Export page
Function Export-OneNote-Page {
	param( $onenote, $node, $path )
	# Replace invalid file characters
	$name = ReplaceIllegal -text $node.name
	$file = $(Join-Path -Path $path -ChildPath "$($name).htm")
	Write-Host "Page: $($file)"
	# Export
	$onenote.Publish($node.ID, $file, 7, "")
	Export-OneNote-Attachments -onenote $onenote -node $node -path $path
}

# Copy embedded attachments
Function Export-OneNote-Attachments {
	param ( $onenote, $node, $path )
	$xml = ''
	$schema = @{one=http://schemas.microsoft.com/office/onenote/2013/onenote}
	$onenote.GetPageContent($node.ID, [ref]$xml)
	$xml | Select-Xml -XPath "//one:Page/one:Outline/one:OEChildren/one:OE/one:InsertedFile" -Namespace $schema | foreach {
		$file = Join-Path -Path $path -ChildPath $_.Node.preferredName
		Write-Host "Attachment: $($file)"
		Copy-Item $_.Node.pathCache -Destination $file
	}
}

Function ReplaceIllegal {
	param ( $text )
	$illegal = [string]::join('',([System.IO.Path]::GetInvalidFileNameChars())) -replace '\\','\\'
	$replaced = $text -replace "[$illegal]",'_'
	return $replaced
}

# Get export folder
$folder = Get-Folder

# Connect
$OneNote = New-Object -ComObject OneNote.Application
[xml]$Hierarchy = ""
$OneNote.GetHierarchy("", [Microsoft.Office.InterOp.OneNote.HierarchyScope]::hsPages, [ref]$Hierarchy)

# Loop over each notebook
foreach ($notebook in $Hierarchy.Notebooks.Notebook ) {
	$name = ReplaceIllegal -text $notebook.name
	$nf = Join-Path -Path $folder -ChildPath $name
	Write-Host "Notebook: $($nf)"
	New-Item -Path $nf -ItemType directory | Out-Null
	Spider-OneNote-Notebook -onenote $OneNote -node $notebook -path $nf
}