summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGraeme Geldenhuys <graemeg@gmail.com>2009-10-05 00:45:24 +0200
committerGraeme Geldenhuys <graemeg@gmail.com>2009-10-05 00:45:24 +0200
commit83ae44c83c990278b31124e582804af386c35ca4 (patch)
tree446076a75f6d6566095ff1cdcfdc94d7a9b020e1 /src
parent37ec9092593320812c06716146b6ca46690b5759 (diff)
downloadfpGUI-83ae44c83c990278b31124e582804af386c35ca4.tar.xz
New unit that handles searching feature.
Signed-off-by: Graeme Geldenhuys <graemeg@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/SearchUnit.pas344
1 files changed, 344 insertions, 0 deletions
diff --git a/src/SearchUnit.pas b/src/SearchUnit.pas
new file mode 100644
index 00000000..56600e3d
--- /dev/null
+++ b/src/SearchUnit.pas
@@ -0,0 +1,344 @@
+Unit SearchUnit;
+
+{$mode objfpc}{$H+}
+
+// NewView - a new OS/2 Help Viewer
+// Copyright 2001 Aaron Lawrence (aaronl at consultant dot com)
+// This software is released under the Gnu Public License - see readme.txt
+
+Interface
+
+// Contains code to search help files.
+
+uses
+ Classes,
+ HelpFile, TextSearchQuery, DataTypes;
+
+const
+ // match weightings
+ mwFirstTitleWord = 50;
+ mwTitleWord = 20;
+ mwFirstIndexWord = 20;
+ mwIndexWord = 10;
+ mwTopicTextWord = 1;
+
+ // note on weightings. The title/index weightings
+ // are multipled by word weightings.
+ // Topic text matches are equal to word weighting
+ // times word weighting.
+
+type
+ TSearchType = ( stStarts, stContains, stMatches );
+
+ procedure SearchHelpFile( HelpFile: THelpFile;
+ Query: TTextSearchQuery;
+ Results: TList;
+ HighlightWords: Int32ArrayPointer );
+
+
+Implementation
+
+uses
+ SysUtils,
+// ACLUtility, ACLStringUtility,
+ HelpTopic, CompareWordUnit, nvUtilities;
+
+// Search the help file dictionary for words that match
+// the given search word. Partial matches are considered.
+// Results returns the matching word indexes.
+// Relevances returns the relevance of the word stored
+// at the same position
+procedure SearchDictionary( HelpFile: THelpFile;
+ SearchWord: string;
+ Results: Int32ArrayPointer );
+var
+ DictIndex: integer;
+ DictWord: string;
+ WordRelevance: longint;
+begin
+ SearchWord:= UpperCase( SearchWord );
+ FillInt32Array( Results, HelpFile.DictionaryCount, 0 );
+
+ for DictIndex:= 0 to HelpFile.DictionaryCount - 1 do
+ begin
+ DictWord := HelpFile.DictionaryWords[ DictIndex ];
+ WordRelevance := CompareWord( SearchWord, DictWord );
+ Results^[ DictIndex ]:= WordRelevance;
+ end;
+end;
+
+// Search titles of topics for given searchword
+procedure SearchTopicTitles( HelpFile: THelpFile;
+ SearchWord: string;
+ Results: Int32ArrayPointer );
+var
+ TopicIndex: longint;
+ Title: string;
+ TitleWord: string;
+ Topic: TTopic;
+ TitleWordIndex: longint;
+ WordRelevance: longint;
+ TitleWordRelevance: longint;
+begin
+ // Search topic titles
+ for TopicIndex:= 0 to HelpFile.TopicCount - 1 do
+ begin
+ Topic:= HelpFile.Topics[ TopicIndex ];
+ Title:= Topic.Title;
+ TitleWordIndex := 0;
+ while Title <> '' do
+ begin
+ TitleWord:= ExtractNextValue( Title, ' ' );
+ WordRelevance := CompareWOrd( SearchWord, TitleWord );
+ if WordRelevance > 0 then
+ begin
+ if TitleWordIndex = 0 then
+ // matching the first word is best
+ TitleWordRelevance := mwFirstTitleWord * WordRelevance
+ else
+ TitleWordRelevance := mwTitleWord * WordRelevance;
+ inc( Results^[ TopicIndex ], TitleWordRelevance );
+ end;
+ inc( TitleWordIndex );
+ end;
+ end;
+end;
+
+// Search index entries for given searchword
+procedure SearchIndex( HelpFile: THelpFile;
+ SearchWord: string;
+ Results: Int32ArrayPointer );
+var
+ IndexIndex: longint;
+ IndexEntry: string;
+ IndexEntryWord: string;
+ Topic: TTopic;
+ IndexEntryWordIndex: longint;
+ WordRelevance: longint;
+ IndexEntryWordRelevance: longint;
+begin
+ for IndexIndex:= 0 to HelpFile.Index.Count - 1 do
+ begin
+ Topic:= HelpFile.Index.Objects[ IndexIndex ] as TTopic;
+ IndexEntry:= HelpFile.Index[ IndexIndex ];
+ IndexEntryWordIndex := 0;
+ while IndexEntry <> '' do
+ begin
+ IndexEntryWord:= ExtractNextValue( IndexEntry, ' ' );
+ WordRelevance := CompareWord( SearchWord, IndexEntryWord );
+ if WordRelevance > 0 then
+ begin
+ if IndexEntryWordIndex = 0 then
+ // matching the first word is best
+ IndexEntryWordRelevance := mwFirstIndexWord * WordRelevance
+ else
+ IndexEntryWordRelevance := mwIndexWord * WordRelevance;
+ inc( Results^[ Topic.Index ], IndexEntryWordRelevance );
+ end;
+ inc( IndexEntryWordIndex );
+ end;
+ end;
+end;
+
+// Utility function used in decompression of search table.
+// Updates the appropriate entry in Results array.
+// The word being matched is given in DictIndex and is
+// used to count the actual occurrences of the word
+// within the topic
+{procedure AddTopicFoundInTopicText( TopicIndex: int16;
+ Results: Int32ArrayPointer;
+ DictIndex: longint;
+ WordRelevance: longint );
+var
+ Topic: TTopic;
+ Relevance: longint;
+begin
+ Topic:= _Topics[ TopicIndex ];
+ Relevance := mwTopicTextWord
+ * Topic.CountWord( DictIndex )
+ * WordRelevance;
+ inc( Results^[ TopicIndex ], Relevance );
+
+end;}
+
+// ------------------------------------------------------
+
+// Master search function. Given a search query,
+// searches topic text, titles, index entries.
+// Matching topics are added to TList, with their
+// SearchRelevance set appropriately.
+procedure SearchHelpFile( HelpFile: THelpFile;
+ Query: TTextSearchQuery;
+ Results: TList;
+ HighlightWords: Int32ArrayPointer );
+var
+ Topic: TTopic;
+ TopicIndex: longint;
+ TermIndex: longint;
+ Term: TSearchTerm;
+ TopicMatches: Int32ArrayPointer;
+ TopicRelevancesForTerm: Int32ArrayPointer;
+ TopicMatchedTerm: boolean;
+
+ WordRelevance: longint;
+ DictionaryRelevances: Int32ArrayPointer;
+ DictIndex: longint;
+ TopicRelevanceForTerm: longint;
+ TopicWordCount: longint;
+begin
+ // Reset flags per topic
+ for TopicIndex := 0 to HelpFile.TopicCount - 1 do
+ begin
+ Topic := HelpFile.Topics[ TopicIndex ];
+ Topic.FoundInSearch := false;
+ Topic.ExcludedInSearch := false;
+ Topic.SearchRelevance := 0;
+ end;
+
+ if HighlightWords <> nil then
+ // Clear the highlightwords array
+ FillInt32Array( HighlightWords, HelpFile.DictionaryCount, 0 );
+
+ // Get memory for dictionary/topic relevance arrays
+ GetMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) );
+ GetMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) );
+ GetMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) );
+
+ for TermIndex := 0 to Query.TermCount - 1 do
+ begin
+ Term := Query.Term[ TermIndex ];
+
+ FillInt32Array( TopicRelevancesForTerm, HelpFile.TopicCount, 0 );
+
+ // Search the dictionary for matches.
+ SearchDictionary( HelpFile, Term.Text, DictionaryRelevances );
+
+ // Update the highlight words array.
+ // (effectively an OR)
+ if HighlightWords <> nil then
+ begin
+ if Term.CombineMethod in [ cmAnd, cmOr ] then
+ begin
+ for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
+ inc( HighlightWords^[ DictIndex ], DictionaryRelevances^[ DictIndex ] );
+ end;
+ end;
+
+ // For each word in the dictionary that matches
+ // this search word, search topics/titles/index
+ for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
+ begin
+ WordRelevance := DictionaryRelevances^[ DictIndex ];
+ if WordRelevance > 0 then
+ begin
+ // Search for occurrences of this word
+ // within the text of topics
+ HelpFile.SearchTable.Search( DictIndex, TopicMatches );
+
+ // Work out total relevance for each topic found:
+ for TopicIndex := 0 to HelpFile.TopicCount - 1 do
+ begin
+ if TopicMatches^[ TopicIndex ] > 0 then
+ begin
+ // Search table indicates word occurs in
+ // this topic, so count number of
+ // occurrences to get relevance
+ Topic := HelpFile.Topics[ TopicIndex ];
+
+ TopicWordCount := Topic.CountWord( DictIndex );
+ TopicRelevancesForTerm^[ TopicIndex ] := TopicWordCount * WordRelevance;
+ end;
+ end;
+ end;
+ end;
+
+ // Search titles and index
+ SearchTopicTitles( HelpFile, Term.Text, TopicRelevancesForTerm );
+ SearchIndex( HelpFile, Term.Text, TopicRelevancesForTerm );
+
+ // Set match flags for each topic, marking
+ // as found or excluded depending on combine
+ // method
+ for TopicIndex := 0 to HelpFile.TopicCount - 1 do
+ begin
+ Topic := HelpFile.Topics[ TopicIndex ];
+ TopicRelevanceForTerm := TopicRelevancesForTerm^[ TopicIndex ];
+ TopicMatchedTerm := TopicRelevanceForTerm > 0;
+ case Term.CombineMethod of
+ cmAnd:
+ if not TopicMatchedTerm then
+ Topic.ExcludedInSearch := true
+ else
+ Topic.FoundInSearch := true;
+
+ cmNot:
+ if TopicMatchedTerm then
+ Topic.ExcludedInSearch := true;
+
+ cmOr:
+ if TopicMatchedTerm then
+ Topic.FoundInSearch := true;
+ end;
+ if TopicMatchedTerm then
+ inc( Topic.SearchRelevance, TopicRelevanceForTerm );
+ end;
+
+ // loop for next word...
+ end;
+
+ // Now find topics that DID have a match
+ // and did NOT have an exclusion match
+ // ... add the topic to result list
+ for TopicIndex := 0 to HelpFile.TopicCount - 1 do
+ begin
+ Topic := HelpFile.Topics[ TopicIndex ];
+ if Topic.FoundInSearch
+ and ( not Topic.ExcludedInSearch ) then
+ begin
+ Results.Add( Topic );
+ end;
+ end;
+
+ FreeMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) );
+ FreeMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) );
+ FreeMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) );
+end;
+
+function ExtractNextIPFWordPart( var Word: string ): string;
+var
+ CharIndex: longint;
+begin
+ assert( Length( Word ) > 0 );
+ CharIndex := 2;
+ if IsDigit( Word[ 1 ] ) then
+ begin
+ // extract string of digits
+ while CharIndex <= Length( Word ) do
+ begin
+ if not IsDigit( Word[ CharIndex ] ) then
+ break;
+ inc( CharIndex );
+ end;
+ end
+ else if IsAlpha( Word[ 1 ] ) then
+ begin
+ // extract string of letters
+ while CharIndex <= Length( Word ) do
+ begin
+ if not IsAlpha( Word[ CharIndex ] ) then
+ break;
+ inc( CharIndex );
+ end;
+ end
+ else
+ begin
+ // extract single non-alphanumeric symbol
+ end;
+ assert( CharIndex > 1 );
+ Result := Copy(Word, 0, CharIndex-1);
+// Result := StrLeft( Word, CharIndex - 1 );
+ Delete( Word, 1, CharIndex - 1 )
+end;
+
+Initialization
+End.