1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
Unit SearchUnit;
{$mode objfpc}{$H+}
// NewView - a new OS/2 Help Viewer
// Copyright 2001 Aaron Lawrence (aaronl at consultant dot com)
// This software is released under the Gnu Public License - see readme.txt
Interface
// Contains code to search help files.
uses
Classes,
HelpFile, TextSearchQuery, DataTypes;
const
// match weightings
mwFirstTitleWord = 50;
mwTitleWord = 20;
mwFirstIndexWord = 20;
mwIndexWord = 10;
mwTopicTextWord = 1;
// note on weightings. The title/index weightings
// are multipled by word weightings.
// Topic text matches are equal to word weighting
// times word weighting.
type
TSearchType = ( stStarts, stContains, stMatches );
procedure SearchHelpFile( HelpFile: THelpFile;
Query: TTextSearchQuery;
Results: TList;
HighlightWords: Int32ArrayPointer );
Implementation
uses
SysUtils,
// ACLUtility, ACLStringUtility,
HelpTopic, CompareWordUnit, nvUtilities;
// Search the help file dictionary for words that match
// the given search word. Partial matches are considered.
// Results returns the matching word indexes.
// Relevances returns the relevance of the word stored
// at the same position
procedure SearchDictionary( HelpFile: THelpFile;
SearchWord: string;
Results: Int32ArrayPointer );
var
DictIndex: integer;
DictWord: string;
WordRelevance: longint;
begin
SearchWord:= UpperCase( SearchWord );
FillInt32Array( Results, HelpFile.DictionaryCount, 0 );
for DictIndex:= 0 to HelpFile.DictionaryCount - 1 do
begin
DictWord := HelpFile.DictionaryWords[ DictIndex ];
WordRelevance := CompareWord( SearchWord, DictWord );
Results^[ DictIndex ]:= WordRelevance;
end;
end;
// Search titles of topics for given searchword
procedure SearchTopicTitles( HelpFile: THelpFile;
SearchWord: string;
Results: Int32ArrayPointer );
var
TopicIndex: longint;
Title: string;
TitleWord: string;
Topic: TTopic;
TitleWordIndex: longint;
WordRelevance: longint;
TitleWordRelevance: longint;
begin
// Search topic titles
for TopicIndex:= 0 to HelpFile.TopicCount - 1 do
begin
Topic:= HelpFile.Topics[ TopicIndex ];
Title:= Topic.Title;
TitleWordIndex := 0;
while Title <> '' do
begin
TitleWord:= ExtractNextValue( Title, ' ' );
WordRelevance := CompareWOrd( SearchWord, TitleWord );
if WordRelevance > 0 then
begin
if TitleWordIndex = 0 then
// matching the first word is best
TitleWordRelevance := mwFirstTitleWord * WordRelevance
else
TitleWordRelevance := mwTitleWord * WordRelevance;
inc( Results^[ TopicIndex ], TitleWordRelevance );
end;
inc( TitleWordIndex );
end;
end;
end;
// Search index entries for given searchword
procedure SearchIndex( HelpFile: THelpFile;
SearchWord: string;
Results: Int32ArrayPointer );
var
IndexIndex: longint;
IndexEntry: string;
IndexEntryWord: string;
Topic: TTopic;
IndexEntryWordIndex: longint;
WordRelevance: longint;
IndexEntryWordRelevance: longint;
begin
for IndexIndex:= 0 to HelpFile.Index.Count - 1 do
begin
Topic:= HelpFile.Index.Objects[ IndexIndex ] as TTopic;
IndexEntry:= HelpFile.Index[ IndexIndex ];
IndexEntryWordIndex := 0;
while IndexEntry <> '' do
begin
IndexEntryWord:= ExtractNextValue( IndexEntry, ' ' );
WordRelevance := CompareWord( SearchWord, IndexEntryWord );
if WordRelevance > 0 then
begin
if IndexEntryWordIndex = 0 then
// matching the first word is best
IndexEntryWordRelevance := mwFirstIndexWord * WordRelevance
else
IndexEntryWordRelevance := mwIndexWord * WordRelevance;
inc( Results^[ Topic.Index ], IndexEntryWordRelevance );
end;
inc( IndexEntryWordIndex );
end;
end;
end;
// Utility function used in decompression of search table.
// Updates the appropriate entry in Results array.
// The word being matched is given in DictIndex and is
// used to count the actual occurrences of the word
// within the topic
{procedure AddTopicFoundInTopicText( TopicIndex: int16;
Results: Int32ArrayPointer;
DictIndex: longint;
WordRelevance: longint );
var
Topic: TTopic;
Relevance: longint;
begin
Topic:= _Topics[ TopicIndex ];
Relevance := mwTopicTextWord
* Topic.CountWord( DictIndex )
* WordRelevance;
inc( Results^[ TopicIndex ], Relevance );
end;}
// ------------------------------------------------------
// Master search function. Given a search query,
// searches topic text, titles, index entries.
// Matching topics are added to TList, with their
// SearchRelevance set appropriately.
procedure SearchHelpFile( HelpFile: THelpFile;
Query: TTextSearchQuery;
Results: TList;
HighlightWords: Int32ArrayPointer );
var
Topic: TTopic;
TopicIndex: longint;
TermIndex: longint;
Term: TSearchTerm;
TopicMatches: Int32ArrayPointer;
TopicRelevancesForTerm: Int32ArrayPointer;
TopicMatchedTerm: boolean;
WordRelevance: longint;
DictionaryRelevances: Int32ArrayPointer;
DictIndex: longint;
TopicRelevanceForTerm: longint;
TopicWordCount: longint;
begin
// Reset flags per topic
for TopicIndex := 0 to HelpFile.TopicCount - 1 do
begin
Topic := HelpFile.Topics[ TopicIndex ];
Topic.FoundInSearch := false;
Topic.ExcludedInSearch := false;
Topic.SearchRelevance := 0;
end;
if HighlightWords <> nil then
// Clear the highlightwords array
FillInt32Array( HighlightWords, HelpFile.DictionaryCount, 0 );
// Get memory for dictionary/topic relevance arrays
GetMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) );
GetMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) );
GetMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) );
for TermIndex := 0 to Query.TermCount - 1 do
begin
Term := Query.Term[ TermIndex ];
FillInt32Array( TopicRelevancesForTerm, HelpFile.TopicCount, 0 );
// Search the dictionary for matches.
SearchDictionary( HelpFile, Term.Text, DictionaryRelevances );
// Update the highlight words array.
// (effectively an OR)
if HighlightWords <> nil then
begin
if Term.CombineMethod in [ cmAnd, cmOr ] then
begin
for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
inc( HighlightWords^[ DictIndex ], DictionaryRelevances^[ DictIndex ] );
end;
end;
// For each word in the dictionary that matches
// this search word, search topics/titles/index
for DictIndex := 0 to HelpFile.DictionaryCount - 1 do
begin
WordRelevance := DictionaryRelevances^[ DictIndex ];
if WordRelevance > 0 then
begin
// Search for occurrences of this word
// within the text of topics
HelpFile.SearchTable.Search( DictIndex, TopicMatches );
// Work out total relevance for each topic found:
for TopicIndex := 0 to HelpFile.TopicCount - 1 do
begin
if TopicMatches^[ TopicIndex ] > 0 then
begin
// Search table indicates word occurs in
// this topic, so count number of
// occurrences to get relevance
Topic := HelpFile.Topics[ TopicIndex ];
TopicWordCount := Topic.CountWord( DictIndex );
TopicRelevancesForTerm^[ TopicIndex ] := TopicWordCount * WordRelevance;
end;
end;
end;
end;
// Search titles and index
SearchTopicTitles( HelpFile, Term.Text, TopicRelevancesForTerm );
SearchIndex( HelpFile, Term.Text, TopicRelevancesForTerm );
// Set match flags for each topic, marking
// as found or excluded depending on combine
// method
for TopicIndex := 0 to HelpFile.TopicCount - 1 do
begin
Topic := HelpFile.Topics[ TopicIndex ];
TopicRelevanceForTerm := TopicRelevancesForTerm^[ TopicIndex ];
TopicMatchedTerm := TopicRelevanceForTerm > 0;
case Term.CombineMethod of
cmAnd:
if not TopicMatchedTerm then
Topic.ExcludedInSearch := true
else
Topic.FoundInSearch := true;
cmNot:
if TopicMatchedTerm then
Topic.ExcludedInSearch := true;
cmOr:
if TopicMatchedTerm then
Topic.FoundInSearch := true;
end;
if TopicMatchedTerm then
inc( Topic.SearchRelevance, TopicRelevanceForTerm );
end;
// loop for next word...
end;
// Now find topics that DID have a match
// and did NOT have an exclusion match
// ... add the topic to result list
for TopicIndex := 0 to HelpFile.TopicCount - 1 do
begin
Topic := HelpFile.Topics[ TopicIndex ];
if Topic.FoundInSearch
and ( not Topic.ExcludedInSearch ) then
begin
Results.Add( Topic );
end;
end;
FreeMem( TopicRelevancesForTerm, HelpFile.TopicCount * sizeof( longint ) );
FreeMem( TopicMatches, HelpFile.TopicCount * sizeof( longint ) );
FreeMem( DictionaryRelevances, HelpFile.DictionaryCount * sizeof( longint ) );
end;
function ExtractNextIPFWordPart( var Word: string ): string;
var
CharIndex: longint;
begin
assert( Length( Word ) > 0 );
CharIndex := 2;
if IsDigit( Word[ 1 ] ) then
begin
// extract string of digits
while CharIndex <= Length( Word ) do
begin
if not IsDigit( Word[ CharIndex ] ) then
break;
inc( CharIndex );
end;
end
else if IsAlpha( Word[ 1 ] ) then
begin
// extract string of letters
while CharIndex <= Length( Word ) do
begin
if not IsAlpha( Word[ CharIndex ] ) then
break;
inc( CharIndex );
end;
end
else
begin
// extract single non-alphanumeric symbol
end;
assert( CharIndex > 1 );
Result := Copy(Word, 0, CharIndex-1);
// Result := StrLeft( Word, CharIndex - 1 );
Delete( Word, 1, CharIndex - 1 )
end;
Initialization
End.
|