Date: 02-16-2022
Return to Index
created by gbSnippets
'Compilable Example: (Jose Includes)
'Credit to Borje Hagsten
'====================================================================
' qSimilarity.bas - to be compiled with PBWIN10 or PBWIN9
' Public Domain similarity routine by Borje Hagsten, April 2014.
' Free to use and abuse in any way you like.
'--------------------------------------------------------------------
' qSimilarity is a simplified and very fast routine for scanning a
' wordlist array for most similar matches to a word. There are
' better and by far more advanced similarity search algorithms out
' there, measuring distance, pattern and strange soundex, etc, but
' this one is extremely fast and still gives pretty good results.
' Can be useful in for example a spelling checker, where both speed
' and similarity result often goes hand in hand.
'
' Code, compiled exe and 110,208 word dictionary file (English.spl)
' included in zip file. Dictionary file is just a simple text file
' with words separated by line feeds. Plenty of alternatives around
' on the net, if other wordlists/languages are desired.
'
'
'====================================================================
' Declares
'--------------------------------------------------------------------
#COMPILE EXE
#DIM ALL
'--------------------------------------------------------------------
#INCLUDE "WIN32API.INC"
'--------------------------------------------------------------------
%IDC_LABEL1 = 101
%IDC_LABEL2 = 102
%IDC_TEXTFIELD1 = 141
%IDC_LISTBOX1 = 161
$WORDLIST = "English.spl" ' in same folder as exe, please..
'====================================================================
' Program entrance
'--------------------------------------------------------------------
FUNCTION PBMAIN () AS LONG
LOCAL hDlg AS DWORD
DIALOG NEW 0, "Similarity search",,, 185, 125, %WS_CAPTION OR %WS_SYSMENU TO hDlg
'------------------------------------------------------------------
CONTROL ADD LABEL, hDlg, %IDC_LABEL1, "Find:", 5, 8, 20, 10
CONTROL ADD TEXTBOX, hDlg, %IDC_TEXTFIELD1, "esearch", 25, 5, 100, 14
CONTROL ADD LABEL, hDlg, %IDC_LABEL2, "Result", 5, 26, 120, 9
CONTROL ADD LISTBOX, hDlg, %IDC_LISTBOX1, , 4, 35, 120, 85, _
%WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR _
%WS_VSCROLL OR %LBS_NOINTEGRALHEIGHT OR _
%LBS_NOTIFY OR %LBS_USETABSTOPS, %WS_EX_CLIENTEDGE
REDIM lt(0) AS LONG : lt(0) = 60 ' move tab stop a bit to the right
CONTROL SEND hDlg, %IDC_LISTBOX1, %LB_SETTABSTOPS, 1, VARPTR(lt(0))
CONTROL ADD BUTTON, hDlg, %IDOK, "Find", 130, 85, 50, 14
CONTROL ADD BUTTON, hDlg, %IDCANCEL, "Quit", 130, 105, 50, 14
'-------------------------------------------------------------------
DIALOG SHOW MODAL hDlg, CALL DlgProc
END FUNCTION
'======================================================================
' Main Dialog procedure
'--------------------------------------------------------------------
CALLBACK FUNCTION DlgProc() AS LONG
LOCAL c, d, WdLen AS LONG, r AS SINGLE
LOCAL sWord AS STRING
DIM sWordList() AS STATIC STRING
SELECT CASE CB.MSG
CASE %WM_INITDIALOG '<- is received right before dialog is shown
#IF %PB_REVISION < &H1000
REDIM sWordList(0) ' needed for PBWIN9, but not PBWIN10
#ENDIF
c = FileToArray($WORDLIST, sWordList())
CONTROL SET TEXT CB.HNDL, %IDC_LABEL2, _
"Result: " + FORMAT$(d) + _
" / " + FORMAT$(UBOUND(sWordList)+1) + _
" ( Similarity % )"
IF c = 0 THEN
MSGBOX "Wordlist not found or empty!", %MB_ICONERROR, "Error!"
CONTROL DISABLE CB.HNDL, %IDOK
END IF
CASE %WM_COMMAND
SELECT CASE CB.CTL
CASE %IDOK '<- also received when Enter key is pressed
IF CB.CTLMSG = %BN_CLICKED OR CB.CTLMSG = 1 THEN
'------------------------------------------------------
' 1. Get word to search for
'------------------------------------------------------
CONTROL GET TEXT CB.HNDL, %IDC_TEXTFIELD1 TO sWord
sWord = LCASE$(TRIM$(sWord))
WdLen = LEN(sWord)
IF WdLen THEN
'--------------------------------------------------
' 2. run similarity check against wordlist array
'--------------------------------------------------
LISTBOX RESET CB.HNDL, %IDC_LISTBOX1
REDIM SimWord(0) AS STRING
REDIM SimValue(0) AS SINGLE
d = 0
FOR c = 0 TO UBOUND(sWordList)-1
r = qSimilarity(sWordList(c), sWord)
' if 75% similarity or better, store result.
' 100% means word is in wordlist, but here we
' list it anyway, just for fun..
IF r >= 75 THEN
IF d > UBOUND(SimWord) THEN
REDIM PRESERVE SimWord(UBOUND(SimWord) + 40)
REDIM PRESERVE SimValue(UBOUND(SimValue) + 40)
END IF
SimValue(d) = r
SimWord(d) = sWordList(c)
INCR d
END IF
NEXT
'--------------------------------------------------
' 3. sort result on similarity % and add to listbox
'--------------------------------------------------
IF d > 0 THEN
REDIM PRESERVE SimWord(d-1) ' trim arrays and sort them
REDIM PRESERVE SimValue(d-1)
ARRAY SORT SimValue(), TAGARRAY SimWord(), DESCEND
'--------------------------------------------------
FOR c = 0 TO d-1
LISTBOX ADD CB.HNDL, %IDC_LISTBOX1, SimWord(c) + $TAB + _
"( " + FORMAT$(SimValue(c)) + "% )"
NEXT
END IF
CONTROL SET TEXT CB.HNDL, %IDC_LABEL2, "Result: " + FORMAT$(d) + _
" / " + FORMAT$(UBOUND(sWordList)+1) + _
" ( Similarity % )"
END IF
END IF
CASE %IDCANCEL '<- also received when Esc key is pressed
IF CB.CTLMSG = %BN_CLICKED OR CB.CTLMSG = 1 THEN
DIALOG END CB.HNDL, 0
END IF
END SELECT
END SELECT
END FUNCTION
'====================================================================
' Load a text file (wordlist) into given array
'--------------------------------------------------------------------
FUNCTION FileToArray (BYVAL sFile AS STRING, _
sWordList() AS STRING) AS LONG
'--------------------------------------------------------------------
LOCAL ff, lCnt AS LONG, sTemp AS STRING
IF LEN(DIR$(sFile)) = 0 THEN EXIT FUNCTION 'if no file
ff = FREEFILE
OPEN sFile FOR BINARY AS #ff LEN = 8192
IF ERR THEN
MSGBOX ERROR$(ERR)
RESET : ERRCLEAR : EXIT FUNCTION
END IF
GET$ #ff, LOF(ff), sTemp
CLOSE #ff
lCnt = TALLY(sTemp, $CRLF)
REDIM sWordList(lCnt)
PARSE sTemp, sWordList(), $CRLF
FUNCTION = UBOUND(sWordList)
END FUNCTION
'====================================================================
' Compare two strings and return similarity value, 0 to 100% equal.
' Works best when String1 length <= String2, so a swap is made if not.
'--------------------------------------------------------------------
FUNCTION qSimilarity (String1 AS STRING, String2 AS STRING) AS SINGLE
LOCAL c, d, iLen1, iLen2 AS LONG
LOCAL b1, b2 AS BYTE PTR, fSim AS SINGLE
b1 = STRPTR(String1)
b2 = STRPTR(String2)
iLen1 = LEN(String1) - 1
iLen2 = LEN(String2) - 1
IF iLen1 > iLen2 THEN ' if String1 is longer than String2
c = iLen1 : iLen1 = iLen2 : iLen2 = c ' <- a little bit faster than SWAP
c = b1 : b1 = b2 : b2 = c
END IF
FOR c = 0 TO iLen1
IF @b1[c] = @b2[d] THEN ' compare letter to letter
INCR fSim ' if match, add 1.00 to result
ELSEIF d < iLen2 AND @b1[c] = @b2[d+1] THEN ' else compare with next letter
fSim = fSim + 0.99 ' if match, add 0.99 to result
END IF ' (Why 0.99? Because..)
INCR d
NEXT
FUNCTION = ((2 * fSim) / (iLen1 + iLen2 + 2)) * 100 ' return result as 0-100%
END FUNCTION
'gbs_01448
'Date: 10-17-2014
http://www.garybeene.com/sw/gbsnippets.htm