I developed this in PowerShell v.5.1 (it uses hashtables since PS does not have a JSON type). It only performs a basic conversion:
function Convert-HTML2ADF {
PARAM (
[Parameter(ValueFromPipeline = $true)][string]$HTMLString
)
Begin {
function ConvertTo-ADF {
PARAM (
[Parameter(ValueFromPipeline = $true)][hashtable]$source
)
Process {
$Dest = "{"
$destarr = @();
foreach($key in $source.Keys) {
if ($source[$key] -is [hashtable]) {
$destarr += "`"$key`": $($source[$key] | ConvertTo-ADF)";
} else {
if ($source[$key] -is [System.Array]) {
$arrconc = "`"$key`": ["
$arr = @();
foreach($member in $source[$key]) {
if ($member -is [hashtable]) {
$arr += $member | ConvertTo-ADF;
} else {
$arr += "$member"; #should never be used
}
}
$arrconc += ($arr -join ',');
$arrconc += ']';
$destarr += $arrconc
} else {
if ($source[$key] -is [string] -and 'true','false' -notcontains $source[$key]) {
$destarr += "`"$key`": `"$($source[$key])`"";
} else {
$destarr += "`"$key`": $($source[$key])";
}
}
}
}
$Dest += ($destarr -join ',');
$Dest += '}';
[char[]]'Â‪¬Ž‰¤Î¼¥' | ForEach-Object { $Dest = $Dest -replace "$_", '';}
return $Dest -replace "\\u([0-9a-fA-F]{4})", "";
}
}
function HTML2ADF {
PARAM (
[Parameter(ValueFromPipeline = $true)]$HTML
)
Process {
[hashtable]$ADF = @{
"version"= 1;
"type" = "doc";
"content" = @();
}
[xml]$x = $HTML;
$out = $null;
Foreach ($n in $x.ChildNodes) {
if ($n -is [System.Xml.XmlElement]) {
$out = RecursiveHTML2ADF -pn $n -Parent $ADF
}
}
return $out #| ConvertTo-ADF
}
}
function RecursiveHTML2ADF([System.Xml.XmlElement]$pn, [hashtable]$Parent, [hashtable[]]$marks = @(), [hashtable[]]$attrs = @()) {
Foreach ($n in $pn.ChildNodes) {
if ($n -is [System.Xml.XmlElement] -or $n -is [System.Xml.XmlText]) {
$childOjb = CheckNode -n $n -marks $marks -attrs $attrs -parenttype $Parent["type"];
$child = $childOjb.Child;
$marks = $childOjb.Marks;
$attrs = $childOjb.Attrs;
if ($n.HasChildNodes) {
if ($child -eq $null) {
$child = RecursiveHTML2ADF -pn $n -Parent $parent -marks $marks -attrs $attrs;
} else {
$child = RecursiveHTML2ADF -pn $n -Parent $child -marks $marks -attrs $attrs;
}
}
if ($child -ne $null -and ($parent | ConvertTo-ADF) -ne ($child | ConvertTo-ADF)) {
$Parent["content"] += $child;
}
#if ('b','b' -notcontains $n.Name.ToLower()) { #patch for marks
}
}
return $Parent;
}
function CheckNode([System.Xml.XmlNode]$n, [hashtable[]]$marks, [hashtable[]]$attrs, [string]$parenttype) {
[hashtable]$out = $null;
switch ($n.Name.ToLower()) {
'p' {
$out = @{
"type"= "paragraph";
"content" = [hashtable[]]@(); #insert text
}; #new paragraph
$marks = @();
break;
}
{'strong','b' -contains $_} {
$marks += @{"type" = "strong"};
break;
}
{'em','i' -contains $_} {
$marks += @{"type" = "em"};
break;
}
{'sub','sup' -contains $_} {
$marks += @{
"type" = "subsup";
"attrs" = @{
"type" = $_;
}
}
}
'#text'{
if ($marks.length -gt 0) {
$out = @{
"type" = "text";
"text" = $n.Value;
"marks" = $marks;
}
} else {
$out = @{
"type" = "text";
"text" = $n.Value;
}
}
if ("tableCell", "listItem" -contains $parenttype) {
$out = @{
"type"= "paragraph";
"content" = [hashtable[]]@($out); #insert text
}; #new paragraph
}
break;
}
'ul' {
$out = @{
"type" = "bulletList";
"content" = @(); #insert listitem
};
break;
}
'li' {
$out = @{
"type" = "listItem";
"content" = @(); #insert bulletList, codeBlock with no marks, mediaSingle, orderedList, paragraph with no marks
}
}
'table' {
$out = @{
"type" = "table";
"attrs" = @{}
"content" = [hashtable[]]@();
}
}
'tr' {
$out = @{
"type" = "tableRow";
"content" = @();
}
}
{'td','th' -contains $_} {
$out = @{
"type" = "tableCell"; # or "tableHeader"
"attrs" = @{}
"content" = @(); #insert: blockquote, bulletList, codeBlock, heading, mediaGroup, orderedList, panel, paragraph, rule
}
}
}
return [pscustomobject]@{"Child" = $out; "Marks" = $marks; "Attrs" = $attrs;};
}
}
Process {
return $HTMLString | HTML2ADF | ConvertTo-ADF;
}
}
s
Alan,
This is much awesome-ness. Is this maintained/available anywhere on GitHub?
Only change I really had to consider in Powershell 5 to import this as a module was this line:
[char[]]'Â‪¬Ž‰¤Î¼¥' | ForEach-Object { $Dest = $Dest -replace "$_", '';}
Changed to:
[char[]]@([char]0x00C2, [char]0x00E2, [char]0x202A, [char]0x00AC, [char]0x017D, [char]0x02C6, [char]0x00A4, [char]0x00CE, [char]0x00BC, [char]0x00A5) | ForEach-Object { $Dest = $Dest -replace "$_", '';}
Thanks!!!
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Hi @Venkat A
Here's the update on your requirement in the feature request
https://jira.atlassian.com/browse/JRACLOUD-77436
Other references here
HTML to Wiki
JSON to Wiki
Thanks,
Pramodh
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Hi @Pramodh M
Looks like still this issue enhancement is not done yet . Also i am looking particularly in .NET . Please let me know if you have anything.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
Online forums and learning are now in one easy-to-use experience.
By continuing, you accept the updated Community Terms of Use and acknowledge the Privacy Policy. Your public name, photo, and achievements may be publicly visible and available in search engines.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.