This is a chained version of these seperate codes:
Extract form data of Folder of PDFs (using PDFtk)
viewtopic.php?f=21&t=91586
Convert Folder of Formdata.txt (created with PDFtk) to .xfdf
viewtopic.php?f=21&t=91587
XFDF to CSV-Converter
viewtopic.php?f=21&t=91524
Calc: Replace "\n" with linebreaks
viewtopic.php?f=21&t=91651
At the End a "merged.csv" is created in the chosen Folder and automatically opened in Calc.
Code: Select all
Sub Folder_of_PDFs_to_CSV
If (Not GlobalScope.BasicLibraries.isLibraryLoaded("Tools")) Then GlobalScope.BasicLibraries.LoadLibrary("Tools")
oSFA = createUnoService("com.sun.star.ucb.SimpleFileAccess")
sFolderpath=""
rem---folderpicker
rem to not use the folderpicker set the folder here and uncomment:
rem sFolderpath=converttourl("C:\Users\kai\Desktop")
if sFolderpath="" then
oDialog = CreateUnoService("com.sun.star.ui.dialogs.FolderPicker")
If oDialog.Execute() = 1 Then
sFolderpath = oDialog.getDirectory
else
exit sub
end if
end if
if not oSFA.Exists(sFolderpath) then
msgbox "Folder not found!"
exit sub
end if
rem-----if file is opened in another application
if fileexists(sFolderpath & "/" & "merged.csv") then
On Error goto Fileaccesserror
proceed:
kill(sFolderpath & "/" & "merged.csv")
end if
rem oProgressBar = ThisComponent.CurrentController.getStatusIndicator()
rem oProgressBar.start( "Creating CSV", 4 )
rem oProgressBar.setValue( 1 )
PDFTK_dumpdatafields_of_Folder_with_PDFs_to_txt_files(sFolderpath)
rem oProgressBar.setValue( 2 )
Convert_Folder_of_pdftk_dumpdatafieldsTXTs_to_XFDFs(sFolderpath)
rem oProgressBar.setValue( 3 )
Convert_multipleXFDFs_to_1CSV(sFolderpath)
rem oProgressBar.setValue( 4 )
calc_replaceLinebreak_UsedArea
rem oProgressBar.end
exit sub
Fileaccesserror:
closemessage = """" & convertfromurl(sFolderpath & "/" & "merged.csv") & """ seems to be opened in another application." & chr(10) & chr(10) & "Please close the file!"
msgbox(closemessage,0)
rem resume proceed
End sub
Sub PDFTK_dumpdatafields_of_Folder_with_PDFs_to_txt_files(sFolderpath)
pdftkapp="pdftk"
sFileName=""
sFileName = Dir(sFolderpath & "/", 0)
Do While (sFileName <> "")
if GetFileNameExtension(sFileName)="pdf" then
Fileurl=sFolderpath & "/" & sFileName
inputfile=convertfromurl(Fileurl)
mkdir(sFolderpath & "/" & "PDFtk-DataFields")
rem indirect way because of PDFtk utf8 filename bug
outputfiledummy = sFolderpath & "/" & "PDFtk-DataFields" & "/" & "datafields.txt"
outputfile = sFolderpath & "/" & "PDFtk-DataFields" & "/" & getfilenamewithoutextension(sFileName) & ".txt"
if fileexists(outputfiledummy) then kill(outputfiledummy)
outputfiledummy=convertfromurl(outputfiledummy)
if fileexists(outputfile) then kill(outputfile)
Shell(pdftkapp,0, """" & inputfile & """" & " dump_data_fields_utf8 output " & """" & outputfiledummy & """", true)
rem indirect way because of PDFtk utf8 filename bug
if fileexists(outputfiledummy) then
Name outputfiledummy as outputfile
end if
end if
sFileName = Dir()
loop
rem msgbox "Files saved to: " & convertfromurl(sFolderpath & "/" & "PDFtk-DataFields")
End Sub
Sub Convert_Folder_of_pdftk_dumpdatafieldsTXTs_to_XFDFs(sFolderpath)
oSFA = createUnoService("com.sun.star.ucb.SimpleFileAccess")
sFolderpath2 = sFolderpath & "/" & "PDFtk-DataFields"
sFileName=""
sFileName = Dir(sFolderpath2 & "/", 0)
Do While (sFileName <> "")
if GetFileNameExtension(sFileName)="txt" then
Fileurl=sFolderpath2 & "/" & sFileName
oTextinputStream = CreateUnoService("com.sun.star.io.TextInputStream")
inputfile = converttourl(Fileurl)
oinputStream = oSFA.openFileRead(inputfile)
oTextinputStream.setInputStream(oinputStream)
mkdir(sFolderpath2 & "/" & "XFDF")
outputfile = converttourl(sFolderpath2 & "/" & "XFDF" & "/" & getfilenamewithoutextension(sFileName) & ".xfdf")
if fileexists(outputfile) then kill(outputfile)
oTextoutputStream = CreateUnoService("com.sun.star.io.TextOutputStream")
ooutputStream = oSFA.openFileWrite(outputfile)
oTextoutputStream.setOutputStream(ooutputStream)
oTextoutputStream.writestring("<?xml version=""1.0"" encoding=""UTF-8""?>" & chr(10) & "<xfdf xmlns=""http://ns.adobe.com/xfdf/"" xml:space=""preserve"">" & chr(10))
rem oTextoutputStream.writestring("<f href=""" & convertfromurl(inputfile) & """/>" & chr(10))
oTextoutputStream.writestring("<fields>" & chr(10))
foundFieldName=0
foundFieldValue=0
do while oTextinputStream.isEOF()=0
stringA = oTextinputStream.readLine()
if instr(stringA,"Field")=0 and instr(stringA,": ")=0 and foundFieldValue=1 then
FieldValue = FieldValue & chr(10) & stringA
elseif foundFieldValue=1 then
oTextoutputStream.writestring("<value>" & FieldValue & "</value>" & chr(10))
foundFieldValue=0
end if
if instr(stringA,"FieldName: ")=1 then
FieldName=mid(stringA,len("FieldName: ")+1)
foundFieldName=1
oTextoutputStream.writestring("<field name=""" & FieldName & """>" & chr(10))
end if
if instr(stringA,"FieldValue: ")=1 then
FieldValue=mid(stringA,len("FieldValue: ")+1)
foundFieldValue=1
end if
if instr(stringA,"---")=1 and foundFieldName then
oTextoutputStream.writestring("</field>" & chr(10))
foundFieldName=0
end if
loop
if foundFieldName then oTextoutputStream.writestring("</field>" & chr(10))
oTextoutputStream.writestring("</fields>" & chr(10) & "</xfdf>" )
oTextinputStream.closeInput
oTextoutputStream.closeOutput()
end if
sFileName = Dir()
loop
End Sub
Sub Convert_multipleXFDFs_to_1CSV(sFolderpath)
sFolderpathOrg=sFolderpath
sFolderpath=sFolderpath & "/" & "PDFtk-DataFields" & "/" & "XFDF"
dim allXFDFHeaders(0)
dim allXFDFValues()
sFileName=""
sFileName = Dir(sFolderpath & "/", 0)
Do While (sFileName <> "")
if GetFileNameExtension(sFileName)="xfdf" then
Fileurl=sFolderpath & "/" & sFileName
oSFA = createUnoService("com.sun.star.ucb.SimpleFileAccess")
oInStream = oSFA.openFileRead(Fileurl)
oDOM = createUnoService("com.sun.star.xml.dom.DocumentBuilder")
oXML = oDOM.parse(oInStream)
oInStream.closeInput
nodeListFields = oXML.getElementsByTagName("fields").item(0).getElementsByTagName("field")
for i=0 to nodeListFields.length-1
curFieldNode = nodeListFields.item(i)
If curFieldNode.hasAttributes Then
nodeAttributes = curFieldNode.Attributes
For k=0 To nodeAttributes.Length - 1
additem=1
for j=0 to ubound(allXFDFHeaders)
if allXFDFHeaders(j)=convertXMLstring(nodeAttributes.item(k).Value) then additem=0
next j
if additem=1 then
redim preserve allXFDFHeaders(ubound(allXFDFHeaders)+1)
allXFDFHeaders(ubound(allXFDFHeaders))=convertXMLstring(nodeAttributes.item(k).Value)
end if
Next k
end if
next i
end if
sFileName = Dir()
loop
k=0
i=0
j=0
sFileName = Dir(sFolderpath & "/", 0)
Do While (sFileName <> "")
if GetFileNameExtension(sFileName)="xfdf" then
Fileurl=sFolderpath & "/" & sFileName
oSFA = createUnoService("com.sun.star.ucb.SimpleFileAccess")
oInStream = oSFA.openFileRead(Fileurl)
oDOM = createUnoService("com.sun.star.xml.dom.DocumentBuilder")
oXML = oDOM.parse(oInStream)
oInStream.closeInput
nodeListFields = oXML.getElementsByTagName("fields").item(0).getElementsByTagName("field")
FieldNameResult=""
FieldValueResult=""
for i=0 to nodeListFields.length-1
curFieldNode = nodeListFields.item(i)
If curFieldNode.hasAttributes Then
nodeAttributes = curFieldNode.Attributes
For k=0 To nodeAttributes.Length - 1
FieldNameResult = FieldNameResult & chr(9) & convertXMLstring(nodeAttributes.item(k).Value)
Next k
end if
childcounter=1
sNodeValue=""
sNodeValuetemp=""
If curFieldNode.hasChildNodes Then
curChildNode = curFieldNode.firstChild
Do While NOT isNull(curChildNode)
If curChildNode.LocalName = "value" then
if curChildNode.hasChildNodes then
curChildChildNode=curChildNode.firstChild
sNodeValuetemp= curChildChildNode.NodeValue
sNodeValuetemp= replace(sNodeValuetemp,chr(10), "\n")
sNodeValuetemp= replace(sNodeValuetemp,";", ",")
else
sNodeValuetemp=""
end if
if childcounter=1 then
sNodeValue=sNodeValuetemp
else
sNodeValue = sNodeValue & "\n" & sNodeValuetemp
end if
childcounter=childcounter+1
end if
curChildNode = curChildNode.nextSibling
loop
FieldValueResult = FieldValueResult & chr(9) & sNodeValue
end if
next i
m=0
n=0
XFDFHeaders = split(FieldNameResult,chr(9))
XFDFValues = split(FieldValueResult,chr(9))
redim allXFDFValues(ubound(allXFDFHeaders))
for m=0 to ubound(XFDFHeaders)
for n=0 to ubound(allXFDFHeaders)
if XFDFHeaders(m)=allXFDFHeaders(n) then allXFDFValues(n)=XFDFValues(m)
next n
next m
FieldValueResult2 = FieldValueResult2 & getfilenamewithoutextension(sFileName) & ".pdf" & join(allXFDFValues(),";") & chr(13) & chr(10)
end if
sFileName = Dir()
loop
if ubound(allXFDFHeaders())=0 then
msgbox "No .xfdf files found!"
exit sub
end if
Outstring = "File" & join(allXFDFHeaders(),";") & chr(13) & chr(10) & FieldValueResult2
outputfile = sFolderpathOrg & "/" & "merged.csv"
if fileexists(outputfile) then kill(outputfile)
oTextoutputStream = CreateUnoService("com.sun.star.io.TextOutputStream")
ooutputStream = oSFA.openFileWrite(outputfile)
oTextoutputStream.setOutputStream(ooutputStream)
oTextoutputStream.writestring(Outstring)
oTextoutputStream.closeOutput()
rem msgbox "Saved: " & convertfromurl(outputfile)
OpenCSVinCalc(outputfile)
End Sub
rem-----for opening in Calc
Sub OpenCSVinCalc(Url)
Dim oDocument as Object
fnURL=ConvertToURL(Url)
'Create new document and import data
oDocument = StarDesktop.LoadComponentFromURL( fnURL, "_blank", 0, _
Array(MakePropertyValue( "FilterName", "Text - txt - csv (StarCalc)" ), _
MakePropertyValue( "FilterOptions", "59,34,76,1,1" )
End Sub
Function MakePropertyValue( Optional cName As String, Optional uValue ) As com.sun.star.beans.PropertyValue
oPropertyValue = createUnoStruct( "com.sun.star.beans.PropertyValue" )
If Not IsMissing( cName ) Then
oPropertyValue.Name = cName
EndIf
If Not IsMissing( uValue ) Then
oPropertyValue.Value = uValue
EndIf
MakePropertyValue() = oPropertyValue
End Function
function convertXMLstring(instring)
instring=replace(instring,"#","%")
convertXMLstring = convertfromurl(instring)
end function
sub calc_replaceLinebreak_UsedArea
Doc = ThisComponent
If Doc.SupportsService("com.sun.star.sheet.SpreadsheetDocument") Then
else
exit sub
end if
Sheet = Doc.Sheets.getByIndex(0)
oCursor = Sheet.createCursor()
oCursor.gotoEndofUsedArea(false)
for i=0 to oCursor.RangeAddress.EndColumn
j=0
for j=0 to oCursor.RangeAddress.EndRow
Sheet.getCellByPosition(i,j).string = replace(Sheet.getCellByPosition(i,j).string,"\n",chr(10))
'Sheet.getCellByPosition(i,j).string = replace(Sheet.getCellByPosition(i,j).string,chr(10),"\n")
next j
next i
for k=0 to oCursor.RangeAddress.EndRow
oRow = Sheet.getRows().getByIndex(oCursor.RangeAddress.EndRow-k)
oRow.OptimalHeight()=True
next
for m=0 to oCursor.RangeAddress.EndColumn
oColumn = Sheet.getColumns().getByIndex(oCursor.RangeAddress.EndColumn-m)
oColumn.OptimalWidth()=True
next
end sub