_________________________________________________________________________________
OO's word count doesn't give a dictionary-word count - but counts anything between separators. (LibreOffice has fixed this, so this is just for OO)
There is a function I got from here
viewtopic.php?f=20&t=82678&hilit=+count#p382966
Which I've modified as it gives some results that include just punctuation
Note: THIS WILL BE VERY SLOW WITH A LARGE OR EVEN A MODEST SIZED DOCUMENT
Better ideas/methods for a word count welcome...
Edit: note, just includes words in the main text paragraphs (through an enumeration), nothing else
Edit2: slight mods to allow choice of count of numbers in main sub and commenting out a test line I left in and shouldn't have.
Code: Select all
'uncomment lines with wdlist in for msgbox with list of words
'dim wdlist
sub CountWordsThisComponentMainText
LANGUAGE = ooolang
txt = thiscomponent.text
en = txt.createenumeration
counttype = com.sun.star.i18n.WordType.DICTIONARY_WORD
acceptnos = true
do until en.hasmoreelements =false
p = en.nextelement
on error goto hr
tot = tot +GetStringWordcount(p.string,counttype,LANGUAGE, acceptnos)
hr:
loop
msgbox "Dictionary Word Count =" & tot,,Thiscomponent.currentcontroller.frame.title
'msgbox wdlist
end sub
'modified from https://forum.openoffice.org/en/forum/viewtopic.php?f=20&t=82678&hilit=+count#p382966
'for choice of count type, and language and to exclude puctuation only words
Function GetStringWordcount(aString,counttype,LANGUAGE, acceptnos)
'*******************************************
'Function: Count Words in provided string
'Author: Andrew Brown
'Last updated 18 March 2016
'Last updated by:Daniel Wilson
'*******************************************
' the ultimate, using the same breakiterator as the program
'from the api 'com.sun.star.i18n.WordType.WORD_COUNT
'const short ANY_WORD = 0;
'Any "words" - words in the meaning of same character types, collection of alphanumeric characters, or collection of non-alphanumeric characters.
'const short ANYWORD_IGNOREWHITESPACES = 1;
' Any "words" - words in the meaning of same character types, collection of alphanumeric characters, or collection of non-alphanumeric characters except blanks.
'const short DICTIONARY_WORD = 2;
' "words" - in the meaning of a collection of alphanumeric characters and some punctuations, like dot for abbreviation.
'const short WORD_COUNT = 3;
' The mode for counting words, it will combine punctuations and spaces as word trail.
'
Dim mystartpos As Long
Dim numwords,nw
Dim nextwd As New com.sun.star.i18n.Boundary
Dim aLocale As New com.sun.star.lang.Locale
Dim brk
aLocale.Language=LANGUAGE ' "en"
numwords=0
mystartpos=0
brk=CreateUNOService("com.sun.star.i18n.BreakIterator")
astring = " " & astring 'doesn't count first word
nextwd=brk.nextWord(aString,startpos,aLocale,counttype)
' com.sun.star.i18n.WordType.WORD_COUNT
Do While nextwd.startPos<> nextwd.endPos
wd = mid(aString,nextwd.startpos+1,nextwd.endpos - nextwd.startpos)
if isValidWord(wd,acceptnos) then
' wdlist =wdlist & wd & chr(9)
numwords=numwords+1
end if
nw=nextwd.startpos
nextwd=brk.nextWord(aString,nw,aLocale,counttype)
' com.sun.star.i18n.WordType.WORD_COUNT
Loop
GetStringWordcount=numwords
' msgbox numwords & " " & st
End Function
'Useful Macro Information For OpenOffice By Andrew Pitonyak
'5.7.2. OOo Locale
'Listing 5.11: Obtain the current OpenOffice.org locale.
Function OOoLang() as string
'Author : Laurent Godard
'e-mail : listes.godard@laposte.net
Dim oSet, oConfigProvider
Dim oParm(0) As New com.sun.star.beans.PropertyValue
Dim sProvider$, sAccess$
sProvider = "com.sun.star.configuration.ConfigurationProvider"
sAccess = "com.sun.star.configuration.ConfigurationAccess"
oConfigProvider = createUnoService(sProvider)
oParm(0).Name = "nodepath"
oParm(0).Value = "/org.openoffice.Setup/L10N"
oSet = oConfigProvider.createInstanceWithArguments(sAccess, oParm())
Dim OOLangue as string
OOLangue= oSet.getbyname("ooLocale") 'en-US
OOLang=lcase(Left(trim(OOLangue),2)) 'en
End Function
function isValidWord(wd,acceptnos) 'eliminate words only containing punctuation
'and numbers if want those not counted
punct = ".!?,:;“”‘’()[]{}<>-—–/’…" & chr(34) & chr(39) '34 double quote '39 single quote
nos= "0123456789"
isValidWord =false
for i = 1 to len(wd)
c=(mid(wd,i,1))
if instr(1,nos,c)>0 then
if acceptnos then
isValidWord = true
exit for
end if
elseif instr(1,punct,c)>0 then
else
isValidWord = true
exit for
end if
next
End function