FB_init

Thursday, October 04, 2012

Powershell to generate Delicious graph of tags

Here is a Powershell script that generates a graph in GEXF format of my Delicious tags. GEXF is an XML representation of a graph that may be consumed by Sigma.js, a JavaScript library for rendering graphs.


Set-StrictMode -version 1
Function CreateNode($id, $label, $xmlDoc, $nodes, $rootNS, $ZeroAttributeValue, $OneAttributeValue, $comment)
{
      $node = $xmlDoc.CreateElement("node", $rootNS)
      $node.SetAttribute("id",$id)
      $node.SetAttribute("label",$label)
           
      if ($ZeroAttributeValue) { # occurrences
          $attvalues = $xmlDoc.CreateElement("attvalues", $rootNS)
          $attvalue = $xmlDoc.CreateElement("attvalue", $rootNS)
          $attvalue.SetAttribute("for","0")
          $attvalue.SetAttribute("value",$ZeroAttributeValue)
          $attvalue.AppendChild($xmlDoc.CreateComment('occurrences'))  
          $attvalues.AppendChild($attvalue)
          $node.AppendChild($attvalues)
      }
           
      if ($OneAttributeValue) { # URL
          $attvalues = $xmlDoc.CreateElement("attvalues", $rootNS)
          $attvalue = $xmlDoc.CreateElement("attvalue", $rootNS)
          $attvalue.SetAttribute("for","1")
          $attvalue.SetAttribute("value",$OneAttributeValue)
          $attvalue.AppendChild($xmlDoc.CreateComment('URL'))  
          $attvalues.AppendChild($attvalue)
          $node.AppendChild($attvalues)
      }
 
      $node.AppendChild($attvalues)
      $node.AppendChild($xmlDoc.CreateComment($comment))  
 
      $nodes.AppendChild($node)
}

Function CreateEdge($id, $source, $target, $xmlDoc, $edges, $rootNS)
{      
       $edge = $xmlDoc.CreateElement("edge", $rootNS)
       $edge.SetAttribute("id",$edgeid)
       $edge.SetAttribute("source",$source)
       $edge.SetAttribute("target",$target)
             
       $edges.AppendChild($edge)
}
Function CreateAttributes($xmlDoc, $graph, $rootNS)
{
    $attribs = $xmlDoc.CreateElement("attributes", $rootNS)
    $attribs.SetAttribute("class","node")
    $attrib = $xmlDoc.CreateElement("attribute", $rootNS)
    $attrib.SetAttribute("id","0")
    $attrib.SetAttribute("title","occurrences-node")
    $attrib.SetAttribute("type","integer")
    $attribs.AppendChild($attrib)
    $attrib = $xmlDoc.CreateElement("attribute", $rootNS)
    $attrib.SetAttribute("id","1")
    $attrib.SetAttribute("title","url")
    $attrib.SetAttribute("type","string")
    $attribs.AppendChild($attrib)
    $graph.AppendChild($attribs)
}
Function EqualString($str1, $str2) {
   return !$str1.CompareTo($str2)
}
Function WhiteListTag($tag) {
  $lang = !'português'.CompareTo($tag) -or !'español'.CompareTo($tag)
  $black = (EqualString 'CSN_' $tag) -or (EqualString 'CSN_source' $tag)  -or (EqualString 'CSN_f' $tag) -or (EqualString 'CSN_t_' $tag) -or (EqualString 'CSN_freepage' $tag)
  $hasPrefix = $tag.StartsWith('CSN_')
  return $lang -or ( !$black -and $hasPrefix )
}
[void][system.reflection.assembly]::LoadFrom("E:\install\JsonNetBin\Net35\Newtonsoft.Json.dll")
Add-Type -AssemblyName "System.Net"
$json = ""
Get-Content C:\Users\gustavo.frederico\Documents\jsonDeliciousGF.txt |foreach{$json += $_ + "`r`n"}
$rss = [Newtonsoft.Json.Linq.JObject]::Parse($json)
[xml]$xmlDoc = New-Object system.Xml.XmlDocument
$xmlDoc.LoadXml('')
[System.Xml.XmlNamespaceManager] $nsmgr = $xmlDoc.NameTable
$rootNS = $xmlDoc.DocumentElement.NamespaceURI
$meta = $xmlDoc.CreateElement("meta", $rootNS)
$meta.SetAttribute("lastmodifieddate","2012-09-20")
$xmlDoc.LastChild.AppendChild($meta)
$creator = $xmlDoc.CreateElement("creator", $rootNS)
$creator.AppendChild($xmlDoc.CreateTextNode('CSNombre script'));
$meta.AppendChild($creator)
$desc = $xmlDoc.CreateElement("description", $rootNS)
$desc.AppendChild($xmlDoc.CreateTextNode('gexf representation of Delicious entries'));
$meta.AppendChild($desc)
$graph = $xmlDoc.CreateElement("graph", $rootNS)
$graph.SetAttribute("defaultedgetype","directed")
$graph.SetAttribute("mode","static")
CreateAttributes $xmlDoc $graph $rootNS
$nodes = $xmlDoc.CreateElement("nodes", $rootNS)
$xmlDoc.LastChild.AppendChild($graph)
$csnProps = $rss.Properties() | where { WhiteListTag $_.Name.ToString() }
$existingLabelNodesTags = @{}
# key: tag name , value: tag node id
$existingLabelNodesURLs = @{}
#  key: URL , value: url node id.
$tagNodeId = 0
$edgeid = 0
foreach($prop in $csnProps) {
   $existingLabelNodesTags.Add($prop.Name.ToString(), $tagNodeId)
 
   CreateNode $tagNodeId $prop.Name.ToString() $xmlDoc $nodes $rootNS $prop.Value.ToString() $null 'tag/label node'
     
   $tagNodeId++
 
#   Write-Host $prop.Value.ToString()
}
$wc = New-Object System.Net.WebClient
$edges = $xmlDoc.CreateElement("edges", $rootNS)
$URLNodeId = $tagNodeId
foreach($prop2 in $csnProps) {
   $currentTag = $prop2.Name.ToString()
 
   $tagNodeIdForCurrentTag = $existingLabelNodesTags.Get_Item($currentTag)
 
   $rawJson = $wc.DownloadString("http://feeds.delicious.com/v2/json/gcsfred/" + $currentTag) # "?count=99"
 
   $rawJson = "{ array: " + $rawJson + " }"
   $references = [Newtonsoft.Json.Linq.JObject]::Parse($rawJson)
 
   $referencesProps = $references.Properties()
     
   foreach($refProp in $references['array']) {  
     
       $uVal = $refProp['u'].ToString()
       $dVal = $refProp['d'].ToString()
       # create node for u  , d if it doesn't exist
     
       if ( $existingLabelNodesURLs.ContainsValue($uVal) ) {      
          $nodeIdURL = $existingLabelNodesURLs.Get_Item($uVal)
       } else {
          $existingLabelNodesURLs.Set_Item($uVal, $URLNodeId)
          $nodeIdURL = $URLNodeId        
         
          # create node for u and d
         
          CreateNode $nodeIdURL $dVal $xmlDoc $nodes $rootNS $null $uVal 'URL node'
         
          $URLNodeId++
       }
     
       # now, create edge between currentTag node and URL nodes
       
       CreateEdge $edgeid $tagNodeIdForCurrentTag $nodeIdURL $xmlDoc $edges $rootNS
       $edgeid++
     
       # end of u and d
     
       $otherTags = $refProp['t']
     
       $i=0
       while( $i -lt $otherTags.Count) {
         $otherTag = $otherTags[$i].ToString()
       
         $isWhite = WhiteListTag $otherTag
               
         if (!$isWhite) {
            $i++  
            continue
         }
       
         # fetch node id of $otherTag
     
         $nodeIdOtherTag = $existingLabelNodesTags.Get_Item($otherTag)
             
         CreateEdge $edgeid $nodeIdURL $nodeIdOtherTag $xmlDoc $edges $rootNS      
         $edgeid++
         $i++        
       }      
             
   }
   # end for each reference tag
}
$graph.AppendChild($nodes)
$graph.AppendChild($edges)
$xmlDoc.Save("c:\mytemp\foo.xml")


No comments: