1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
program markov;
{$mode objfpc}{$H+}
uses
{$IFDEF UNIX}{$IFDEF UseCThreads}
cthreads,
{$ENDIF}{$ENDIF}
Classes
{ you can add units after this },
sysutils, math, mystringlistunit, lowlevelunit;
var
wordFileName,s: string;
count: longint;
depth,i,j,current: int64;
c: char;
f: tMyStringList;
probabilities: array of extended;
total: extended;
c2i: array[char] of longint;
i2c: string;
begin
__ausgabenMaske:=3; // no logs
randomize;
if paramCount<>3 then begin
writeln(stderr,'usage: markov word-file chain-depth count');
halt(1);
end;
wordFileName:=paramStr(1);
if not fileExists(wordFileName) then begin
writeln(stderr,'file '''+wordFileName+''' does not exist');
halt(1);
end;
try
depth:=strToInt(paramstr(2));
except
writeln(stderr,''''+paramStr(2)+''' is not a valid integer');
halt(1);
end;
if depth<=0 then begin
writeln(stderr,intToStr(depth)+' is not positive');
halt(1);
end;
try
count:=strToInt(paramstr(3));
except
writeln(stderr,''''+paramStr(3)+''' is not a valid integer');
halt(1);
end;
if count<=0 then begin
writeln(stderr,intToStr(count)+' is not positive');
halt(1);
end;
i2c:='abcdefghijklmnopqrstuvwxyzäöüßABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ';
for i:=length(i2c)-1 downto 1 do
for j:=length(i2c) downto i+1 do
if i2c[i]=i2c[j] then
delete(i2c,j,1);
for c:=#0 to #255 do
c2i[c]:=-1;
for i:=1 to length(i2c) do
c2i[i2c[i]]:=i;
setLength(probabilities,round(power(length(i2c)+1,depth)));
for i:=0 to length(probabilities)-1 do
probabilities[i]:=0;
f:=tMyStringList.create;
f.loadFromFile(wordFileName);
while f.readln(s) do begin
s:=s+' '; // lf ends a word, too
current:=0; // reset current index
for i:=1 to length(s) do begin
current:=current*(length(i2c)+1) mod length(probabilities);
if c2i[s[i]]<0 then begin
if (current<>0) and (s[i]<>'.') then // terminate word
probabilities[current]:=probabilities[current]+1;
current:=0;
continue;
end;
current:=current + c2i[s[i]];
probabilities[current]:=probabilities[current]+1;
end;
end;
for i:=0 to length(probabilities) div (length(i2c)+1)-1 do begin
total:=0;
for j:=0 to length(i2c) do
total:=total + probabilities[j + i * (length(i2c)+1)];
if total=0 then
continue;
total:=1/total;
for j:=0 to length(i2c) do
probabilities[j + i * (length(i2c)+1)]:= probabilities[j + i * (length(i2c)+1)] * total;
end;
(*
// debug output
for current:=0 to length(probabilities)-1 do begin
j:=current;
for i:=1 to depth do begin
if (j mod (length(i2c)+1)) = 0 then
write('_')
else
write(i2c[j mod (length(i2c)+1)]);
j:=j div (length(i2c)+1);
end;
writeln(' ',probabilities[current]);
end; *)
// generation
while count>0 do begin
current:=0;
s:='';
repeat
current:=current * (length(i2c)+1) mod length(probabilities);
total:=random;
while total>0 do begin
total:=total - probabilities[current];
inc(current);
end;
dec(current);
if current mod (length(i2c)+1) = 0 then
break
else
s:=s+i2c[current mod (length(i2c)+1)];
until false;
if not f.hatZeile(s,false) then begin;
writeln(s);
dec(count);
end;
end;
f.free;
setLength(probabilities,0);
end.
|