Tuesday, April 28, 2009

ETL using Microsoft Excel, XML and XSLT

Microsoft Excel VBA is a powerful tool to provide a Facade for simple ETL process.

  • GUI designed and implemented by Microsoft Excel VBA form
  • Business Logic implemented by XSLT
  • Data is stored as XML format



Sub OpenUserForm()
frmExchangeRate.Show
End Sub

Sub xmltocsv()
' Dim oDOM As MSXML2.DOMDocument
' Dim oXML As MSXML2.DOMDocument
' Dim oXSL As MSXML2.DOMDocument
Dim oDOM As MSXML2.FreeThreadedDOMDocument
Dim oXML As MSXML2.FreeThreadedDOMDocument
Dim oXSL As MSXML2.FreeThreadedDOMDocument
Dim oXSLTemplate As XSLTemplate
Dim xslProc As IXSLProcessor
Dim strHTML As String
Dim strTransform As String
Dim currentDir As String

currentDir = CurDir()
Set oDOM = CreateObject("MSXML2.FreeThreadedDOMDocument")
oDOM.async = False
' oDOM.Load currentDir & "\" & frmExchangeRate.txtXSLFilename.Text
oDOM.loadXML Sheets("Run VBA").Range("A1").Value
Set oXSL = CreateObject("MSXML2.FreeThreadedDOMDocument")
oXSL.async = False
oXSL.Load currentDir & "\" & frmExchangeRate.txtXSLFilename.Text
' oXSL.loadXML Sheets("Run VBA").Range("B1").Value
Set XSLTemplate = CreateObject("MSXML2.XSLTemplate")

XSLTemplate.stylesheet = oXSL
Set xslProc = XSLTemplate.createProcessor()
xslProc.addParameter "GMFile", currentDir & "\" & frmExchangeRate.txtGMFilename.Value
xslProc.addParameter "IBMFile", currentDir & "\" & rmExchangeRate.txtIBMFilename.Value
xslProc.addParameter "ISOFile", currentDir & "\" & frmExchangeRate.txtISOFilename.Value
xslProc.input = oDOM
xslProc.transform
strTransform = xslProc.output
'your XSLT stylesheet should be saved as unicode or UTF not ansii
'note encoding instruction maybe needed for european language encoding say swedish characters
' strTransform = oDOM.transformNode(oXSL)
strHTML = "" & vbCrLf & _
"<root>" & strTransform & "</root>"
WriteFile "ERMSExchangeRate" & Format(Now, "yyyymmddhhmmss") & ".tbl", strTransform
'
'the above XSLT transform with xsl file converts this to a flat csv format file
Set oDOM = Nothing
Set oXML = Nothing
Set oXSL = Nothing
Set XSLTemplate = Nothing
Set xslProc = NothingEnd Sub

Public Sub WriteFile(ByVal sFileName As String, ByVal sContents As String)
' Dump XML String to File for debugging
Dim fhFile As Integer
fhFile = FreeFile
' Debug.Print "Length of string=" & Len(sContents)
Open sFileName For Output As #fhFile
Print #fhFile, sContents;
Close #fhFile
Debug.Print "Out File" & sFileName
End Sub

XSLT File
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:msxsl="urn:schemas-microsoft-com:xslt" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:user="mynamespace" xmlns="urn:schemas-microsoft-com:office:spreadsheet" version="1.0"
>
<msxsl:script implements-prefix="user"><![CDATA[
]]></msxsl:script>
<xsl:output method="text"/>


<xsl:param name="File1"/>
<xsl:param name="File2"/>
<xsl:param name="ISOFile"/>

<xsl:variable name="iso4217_list">
<root> <xsl:for-each select="document($ISOFile)//ss:Workbook/ss:Worksheet[1]/ss:Table[1]/ss:Row"> <item> <country><xsl:value-of select="translate(ss:Cell[3]/ss:Data/text(),'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')"/></country>
<code><xsl:value-of select="number(ss:Cell[4]/ss:Data/text())"/></code>
</item>
</xsl:for-each>
</root>
</xsl:variable>


<xsl:variable name="gm_exchange_rate_list">
<root>
<xsl:for-each select="document($File1)//ss:Workbook/ss:Worksheet[1]/ss:Table[1]/ss:Row[position() > 1]">
<xsl:variable name="country"><xsl:value-of select="ss:Cell[4]/ss:Data/text()"/></xsl:variable>
<item>
<time_stamp><xsl:value-of select="ss:Cell[1]/ss:Data/text()"/></time_stamp>
<rate_type><xsl:value-of select="ss:Cell[2]/ss:Data/text()"/></rate_type>
<data><xsl:value-of select="ss:Cell[3]/ss:Data/text()"/></data>
<country><xsl:value-of select="$country"/></country>
<country_name><xsl:value-of select="ss:Cell[5]/ss:Data/text()"/></country_name>
<code><xsl:value-of select="msxsl:node-set($iso4217_list)/ss:root/ss:item[ss:country/text()=$country][1]/ss:code/text()"/></code>
<multiplier_rate><xsl:value-of select="ss:Cell[6]/ss:Data/text()"/></multiplier_rate>
</item>
</xsl:for-each>
</root>
</xsl:variable>

<xsl:variable name="eLedger_date"><xsl:value-of select="substring(msxsl:node-set($gm_exchange_rate_list)/ss:root/ss:item[2]/ss:time_stamp/text(), 1, 8)"/></xsl:variable>
<xsl:variable name="eLedger_time"><xsl:value-of select="substring(msxsl:node-set($gm_exchange_rate_list)/ss:root/ss:item[2]/ss:time_stamp/text(), 9, 6)"/></xsl:variable>
<xsl:template match="/">HDR<xsl:text>^</xsl:text>4<xsl:text>^</xsl:text>1<xsl:text>^</xsl:text>R<xsl:text>^</xsl:text>1<xsl:text>^</xsl:text>1<xsl:text>^</xsl:text><xsl:value-of select="$eLedger_date"/><xsl:text>^</xsl:text><xsl:value-of select="$eLedger_time"/><xsl:text>^</xsl:text>General Motors<xsl:text>^</xsl:text>E-Ledger<xsl:text> </xsl:text>
<xsl:for-each select="msxsl:node-set($gm_exchange_rate_list)/ss:root/ss:item">
<xsl:variable name="code"><xsl:value-of select="ss:Cell[1]/ss:Data/text()"/></xsl:variable>
<xsl:value-of select="ss:code/text()"/><xsl:text>^</xsl:text><xsl:value-of select="ss:multiplier_rate/text()"/><xsl:text> </xsl:text>
</xsl:for-each>TRL<xsl:text>^</xsl:text><xsl:value-of select="count(msxsl:node-set($gm_exchange_rate_list)/ss:root/ss:item)"/>
</xsl:template>
</xsl:stylesheet>


Process Steps
1 Save "eFXControllerSettlementsMonthlyBalSheet.xls" as "eFXControllerSettlementsMonthlyBalSheet.xml" ][option: xml spreedsheet (*.xml)] in the same folder
2 Save "ersexrate001.tbl" as "ersexrate001.xml" ][option: xml spreedsheet (*.xml)] in the same folder
3 Open EXCEL_VBA.xls from Microsoft Excel (Do not double click the file, use menu Start->Programs->Microsoft Office->Microsoft Office Excel 2007, and then open the file)
4 After ERMS_VBA.xls file is opened, a dialog box displays. Click "Run" button.
5 Message indicates that a new "tbl" file is created in the same directory.
6 Open the newly created tbl file in Microsoft Excel.

No comments:

Post a Comment