diff options
author | Graeme Geldenhuys <graemeg@gmail.com> | 2009-10-05 00:45:24 +0200 |
---|---|---|
committer | Graeme Geldenhuys <graemeg@gmail.com> | 2009-10-05 00:45:24 +0200 |
commit | 83ae44c83c990278b31124e582804af386c35ca4 (patch) | |
tree | 446076a75f6d6566095ff1cdcfdc94d7a9b020e1 | |
parent | 37ec9092593320812c06716146b6ca46690b5759 (diff) | |
download | fpGUI-83ae44c83c990278b31124e582804af386c35ca4.tar.xz |
New unit that handles searching feature.
Signed-off-by: Graeme Geldenhuys <graemeg@gmail.com>
-rw-r--r-- | src/SearchUnit.pas | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/SearchUnit.pas b/src/SearchUnit.pas new file mode 100644 index 00000000..56600e3d --- /dev/null +++ b/src/SearchUnit.pas @@ -0,0 +1,344 @@ +Unit SearchUnit; + +{$mode objfpc}{$H+} + +// NewView - a new OS/2 Help Viewer +// Copyright 2001 Aaron Lawrence (aaronl at consultant dot com) +// This software is released under the Gnu Public License - see readme.txt + +Interface + +// Contains code to search help files. + +uses + Classes, + HelpFile, TextSearchQuery, DataTypes; + +const + // match weightings + mwFirstTitleWord = 50; + mwTitleWord = 20; + mwFirstIndexWord = 20; + mwIndexWord = 10; + mwTopicTextWord = 1; + + // note on weightings. The title/index weightings + // are multipled by word weightings. + // Topic text matches are equal to word weighting + // times word weighting. + +type + TSearchType = ( stStarts, stContains, stMatches ); + + procedure SearchHelpFile( HelpFile: THelpFile; + Query: TTextSearchQuery; + Results: TList; + HighlightWords: Int32ArrayPointer ); + + +Implementation + +uses + SysUtils, +// ACLUtility, ACLStringUtility, + HelpTopic, CompareWordUnit, nvUtilities; + +// Search the help file dictionary for words that match +// the given search word. Partial matches are considered. +// Results returns the matching word indexes. +// Relevances returns the relevance of the word stored +// at the same position +procedure SearchDictionary( HelpFile: THelpFile; + SearchWord: string; + Results: Int32ArrayPointer ); +var + DictIndex: integer; + DictWord: string; + WordRelevance: longint; +begin + SearchWord:= UpperCase( SearchWord ); + FillInt32Array( Results, HelpFile.DictionaryCount, 0 ); + + for DictIndex:= 0 to HelpFile.DictionaryCount - 1 do + begin + DictWord := HelpFile.DictionaryWords[ DictIndex ]; + WordRelevance := CompareWord( SearchWord, DictWord ); + Results^[ DictIndex ]:= WordRelevance; + end; +end; + +// Search titles of topics for given searchword +procedure SearchTopicTitles( HelpFile: THelpFile; + SearchWord: string; + Results: Int32ArrayPointer ); +var + TopicIndex: longint; + Title: string; + TitleWord: string; + Topic: TTopic; + TitleWordIndex: longint; + WordRelevance: longint; + TitleWordRelevance: longint; +begin + // Search topic titles + for TopicIndex:= 0 to HelpFile.TopicCount - 1 do + begin + Topic:= HelpFile.Topics[ TopicIndex ]; + Title:= Topic.Title; + TitleWordIndex := 0; + while Title <> '' do + begin + TitleWord:= ExtractNextValue( Title, ' ' ); + WordRelevance := CompareWOrd( SearchWord, TitleWord ); + if WordRelevance > 0 then + begin + if TitleWordIndex = 0 then + // matching the first word is best + TitleWordRelevance := mwFirstTitleWord * WordRelevance + else + TitleWordRelevance := mwTitleWord * WordRelevance; + inc( Results^[ TopicIndex ], TitleWordRelevance ); + end; + inc( TitleWordIndex ); + end; + end; +end; + +// Search index entries for given searchword +procedure SearchIndex( HelpFile: THelpFile; + SearchWord: string; + Results: Int32ArrayPointer ); +var + IndexIndex: longint; + IndexEntry: string; + IndexEntryWord: string; + Topic: TTopic; + IndexEntryWordIndex: longint; + WordRelevance: longint; + IndexEntryWordRelevance: longint; +begin + for IndexIndex:= 0 to HelpFile.Index.Count - 1 do + begin + Topic:= HelpFile.Index.Objects[ IndexIndex ] as TTopic; + IndexEntry:= HelpFile.Index[ IndexIndex ]; + IndexEntryWordIndex := 0; + while IndexEntry <> '' do + begin + IndexEntryWord:= ExtractNextValue( IndexEntry, ' ' ); + WordRelevance := CompareWord( SearchWord, IndexEntryWord ); + if WordRelevance > 0 then + begin + if IndexEntryWordIndex = 0 then + // matching the first word is best + IndexEntryWordRelevance := mwFirstIndexWord * WordRelevance + else + IndexEntryWordRelevance := mwIndexWord * WordRelevance; + inc( Results^[ Topic.Index ], IndexEntryWordRelevance ); + end; + inc( IndexEntryWordIndex ); + end; + end; +end; + +// Utility function used in decompression of search table. +// Updates the appropriate entry in Results array. +// The word being matched is given in DictIndex and is +// used to count the actual occurrences of the word +// within the topic +{procedure AddTopicFoundInTopicText( TopicIndex: int16; + Results: Int32ArrayPointer; + DictIndex: longint; + WordRelevance: longint ); +var + Topic: TTopic; + Relevance: longint; +begin + Topic:= _Topics[ TopicIndex ]; + Relevance := mwTopicTextWord + * Topic.CountWord( DictIndex ) + * WordRelevance; + inc( Results^[ TopicIndex ], Relevance ); + +end;} + +// ------------------------------------------------------ + +// Master search function. Given a search query, +// searches topic text, titles, index entries. +// Matching topics are added to TList, with their +// SearchRelevance set appropriately. +procedure SearchHelpFile( HelpFile: THelpFile; + Query: TTextSearchQuery; + Results: TList; + HighlightWords: Int32ArrayPointer ); +var + Topic: TTopic; + TopicIndex: longint; + TermIndex: longint; + Term: TSearchTerm; + TopicMatches: Int32ArrayPointer; + TopicRelevancesForTerm: Int32ArrayPointer; + TopicMatchedTerm: boolean; + + WordRelevance: longint; + DictionaryRelevances: Int32ArrayPointer; + DictIndex: longint; + TopicRelevanceForTerm: longint; + TopicWordCount: longint; +begin + // Reset flags per topic + for TopicIndex := 0 to HelpFile.TopicCount - 1 do + begin + Topic := HelpFile.Topics[ TopicIndex ]; + Topic.FoundInSearch := false; + Topic.ExcludedInSearch := false; + Topic.SearchRelevance := 0; + end; + + if HighlightWords <> nil then + // Clear the highlightwords array + FillInt32Array( HighlightWords, HelpFile.DictionaryCount, 0 ); + + // Get memory for dictionary/topic relevance arrays + GetMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) ); + GetMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) ); + GetMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) ); + + for TermIndex := 0 to Query.TermCount - 1 do + begin + Term := Query.Term[ TermIndex ]; + + FillInt32Array( TopicRelevancesForTerm, HelpFile.TopicCount, 0 ); + + // Search the dictionary for matches. + SearchDictionary( HelpFile, Term.Text, DictionaryRelevances ); + + // Update the highlight words array. + // (effectively an OR) + if HighlightWords <> nil then + begin + if Term.CombineMethod in [ cmAnd, cmOr ] then + begin + for DictIndex := 0 to HelpFile.DictionaryCount - 1 do + inc( HighlightWords^[ DictIndex ], DictionaryRelevances^[ DictIndex ] ); + end; + end; + + // For each word in the dictionary that matches + // this search word, search topics/titles/index + for DictIndex := 0 to HelpFile.DictionaryCount - 1 do + begin + WordRelevance := DictionaryRelevances^[ DictIndex ]; + if WordRelevance > 0 then + begin + // Search for occurrences of this word + // within the text of topics + HelpFile.SearchTable.Search( DictIndex, TopicMatches ); + + // Work out total relevance for each topic found: + for TopicIndex := 0 to HelpFile.TopicCount - 1 do + begin + if TopicMatches^[ TopicIndex ] > 0 then + begin + // Search table indicates word occurs in + // this topic, so count number of + // occurrences to get relevance + Topic := HelpFile.Topics[ TopicIndex ]; + + TopicWordCount := Topic.CountWord( DictIndex ); + TopicRelevancesForTerm^[ TopicIndex ] := TopicWordCount * WordRelevance; + end; + end; + end; + end; + + // Search titles and index + SearchTopicTitles( HelpFile, Term.Text, TopicRelevancesForTerm ); + SearchIndex( HelpFile, Term.Text, TopicRelevancesForTerm ); + + // Set match flags for each topic, marking + // as found or excluded depending on combine + // method + for TopicIndex := 0 to HelpFile.TopicCount - 1 do + begin + Topic := HelpFile.Topics[ TopicIndex ]; + TopicRelevanceForTerm := TopicRelevancesForTerm^[ TopicIndex ]; + TopicMatchedTerm := TopicRelevanceForTerm > 0; + case Term.CombineMethod of + cmAnd: + if not TopicMatchedTerm then + Topic.ExcludedInSearch := true + else + Topic.FoundInSearch := true; + + cmNot: + if TopicMatchedTerm then + Topic.ExcludedInSearch := true; + + cmOr: + if TopicMatchedTerm then + Topic.FoundInSearch := true; + end; + if TopicMatchedTerm then + inc( Topic.SearchRelevance, TopicRelevanceForTerm ); + end; + + // loop for next word... + end; + + // Now find topics that DID have a match + // and did NOT have an exclusion match + // ... add the topic to result list + for TopicIndex := 0 to HelpFile.TopicCount - 1 do + begin + Topic := HelpFile.Topics[ TopicIndex ]; + if Topic.FoundInSearch + and ( not Topic.ExcludedInSearch ) then + begin + Results.Add( Topic ); + end; + end; + + FreeMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) ); + FreeMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) ); + FreeMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) ); +end; + +function ExtractNextIPFWordPart( var Word: string ): string; +var + CharIndex: longint; +begin + assert( Length( Word ) > 0 ); + CharIndex := 2; + if IsDigit( Word[ 1 ] ) then + begin + // extract string of digits + while CharIndex <= Length( Word ) do + begin + if not IsDigit( Word[ CharIndex ] ) then + break; + inc( CharIndex ); + end; + end + else if IsAlpha( Word[ 1 ] ) then + begin + // extract string of letters + while CharIndex <= Length( Word ) do + begin + if not IsAlpha( Word[ CharIndex ] ) then + break; + inc( CharIndex ); + end; + end + else + begin + // extract single non-alphanumeric symbol + end; + assert( CharIndex > 1 ); + Result := Copy(Word, 0, CharIndex-1); +// Result := StrLeft( Word, CharIndex - 1 ); + Delete( Word, 1, CharIndex - 1 ) +end; + +Initialization +End. |