<?xml version="1.0" encoding="utf-8"?>
<CATALOG>
<CD>
<TITLE>Empire Burlesque</TITLE><ARTIST>Bob Dylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
<CD>
<TITLE>Greatest Hits</TITLE><ARTIST>Dolly Parton</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>RCA</COMPANY><PRICE>9.90</PRICE><YEAR>1982</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
</CATALOG>
<CATALOG>
<CD>
<TITLE>Empire Burlesque</TITLE><ARTIST>Bob Dylan</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>Columbia</COMPANY><PRICE>10.90</PRICE><YEAR>1985</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
<CD>
<TITLE>Greatest Hits</TITLE><ARTIST>Dolly Parton</ARTIST><COUNTRY>USA</COUNTRY><COMPANY>RCA</COMPANY><PRICE>9.90</PRICE><YEAR>1982</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
<CD>
<TITLE>Hide your heart</TITLE><ARTIST>Bonnie Tyler</ARTIST><COUNTRY>UK</COUNTRY><COMPANY>CBS Records</COMPANY><PRICE>9.90</PRICE><YEAR>1988</YEAR>
</CD>
</CATALOG>
Code:
# Load XML file
$file = "catalog.xml"
$xml = [xml](Get-Content $file)
# Add the DuppieID attribute to the XML document
$i = 1
foreach($node in $xml.CATALOG.CD)
{
$node.SetAttribute("DuppieID", $i)
$i++
}
# Parse the document and delete the duplicate nodes with the exception of the first occurrence
foreach($node in $xml.CATALOG.CD)
{
$duplicates = ($xml.CATALOG.CD | where {$_.TITLE -eq $node.TITLE})
if($duplicates.length -gt 1)
{
for($i = 1; $i -lt $duplicates.length; $i++)
{
$element = $duplicates[$i]
$nodeToRemove = $xml.SelectSingleNode("//CATALOG/CD[@DuppieID=" + $element.DuppieID + "]")
$nodeToRemove.ParentNode.RemoveChild($nodeToRemove)
}
}
}
# Remove DuppieID attribute
foreach($node in $xml.CATALOG.CD)
{
$node.RemoveAttribute("DuppieID");
}
# Update the file
$xml.Save($file)
$file = "catalog.xml"
$xml = [xml](Get-Content $file)
# Add the DuppieID attribute to the XML document
$i = 1
foreach($node in $xml.CATALOG.CD)
{
$node.SetAttribute("DuppieID", $i)
$i++
}
# Parse the document and delete the duplicate nodes with the exception of the first occurrence
foreach($node in $xml.CATALOG.CD)
{
$duplicates = ($xml.CATALOG.CD | where {$_.TITLE -eq $node.TITLE})
if($duplicates.length -gt 1)
{
for($i = 1; $i -lt $duplicates.length; $i++)
{
$element = $duplicates[$i]
$nodeToRemove = $xml.SelectSingleNode("//CATALOG/CD[@DuppieID=" + $element.DuppieID + "]")
$nodeToRemove.ParentNode.RemoveChild($nodeToRemove)
}
}
}
# Remove DuppieID attribute
foreach($node in $xml.CATALOG.CD)
{
$node.RemoveAttribute("DuppieID");
}
# Update the file
$xml.Save($file)
Source: http://klemmestad.com/2014/08/08/removing-duplicate-xml-nodes-using-powershell/
No comments:
Post a Comment