1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
program markov;
{$mode objfpc}{$H+}
uses
{$IFDEF UNIX}{$IFDEF UseCThreads}
cthreads,
{$ENDIF}{$ENDIF}
Classes
{ you can add units after this },
sysutils, math;
var
wordFileName,s: string;
depth,i,j,current: int64;
c: char;
f: textFile;
probabilities: array of extended;
total: extended;
c2i: array[char] of longint;
i2c: string;
begin
randomize;
if paramCount<>2 then begin
writeln(stderr,'usage: markov word-file chain-depth');
halt(1);
end;
wordFileName:=paramStr(1);
if not fileExists(wordFileName) then begin
writeln(stderr,'file '''+wordFileName+''' does not exist');
halt(1);
end;
try
depth:=strToInt(paramstr(2));
except
writeln(stderr,''''+paramStr(2)+''' is not a valid integer');
halt(1);
end;
if depth<=0 then begin
writeln(stderr,intToStr(depth)+' is not positive');
halt(1);
end;
i2c:='abcdefghijklmnopqrstuvwxyzäöüßABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ';
for i:=length(i2c)-1 downto 1 do
for j:=length(i2c) downto i+1 do
if i2c[i]=i2c[j] then
delete(i2c,j,1);
for c:=#0 to #255 do
c2i[c]:=-1;
for i:=1 to length(i2c) do
c2i[i2c[i]]:=i;
setLength(probabilities,round(power(length(i2c)+1,depth)));
for i:=0 to length(probabilities)-1 do
probabilities[i]:=0;
assignFile(f,wordFileName);
reset(f);
while not eof(f) do begin
readln(f,s);
current:=0; // reset current index
for i:=1 to length(s) do begin
current:=current*(length(i2c)+1) mod length(probabilities);
if c2i[s[i]]<0 then begin
if current<>0 then // terminate word
probabilities[current]:=probabilities[current]+1;
current:=0;
continue;
end;
current:=current + c2i[s[i]];
probabilities[current]:=probabilities[current]+1;
end;
end;
closeFile(f);
for i:=0 to length(probabilities) div (length(i2c)+1)-1 do begin
total:=0;
for j:=0 to length(i2c) do
total:=total + probabilities[j + i * (length(i2c)+1)];
if total=0 then
continue;
total:=1/total;
for j:=0 to length(i2c) do
probabilities[j + i * (length(i2c)+1)]:= probabilities[j + i * (length(i2c)+1)] * total;
end;
(*
// debug output
for current:=0 to length(probabilities)-1 do begin
j:=current;
for i:=1 to depth do begin
if (j mod (length(i2c)+1)) = 0 then
write('_')
else
write(i2c[j mod (length(i2c)+1)]);
j:=j div (length(i2c)+1);
end;
writeln(' ',probabilities[current]);
end; *)
// generation
current:=0;
repeat
current:=current * (length(i2c)+1) mod length(probabilities);
total:=random;
while total>0 do begin
total:=total - probabilities[current];
inc(current);
end;
dec(current);
if current mod (length(i2c)+1) = 0 then
break
else
write(i2c[current mod (length(i2c)+1)]);
until false;
writeln;
setLength(probabilities,0);
end.
|