summaryrefslogtreecommitdiff
path: root/markov.lpr
blob: a9e7507f025fd58119e78c9dc5dc241f17d7e4d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
program markov;

{$mode objfpc}{$H+}

uses
  {$IFDEF UNIX}{$IFDEF UseCThreads}
  cthreads,
  {$ENDIF}{$ENDIF}
  Classes
  { you can add units after this },
  sysutils, math;

var
  wordFileName,s:    string;
  depth,i,j,current: int64;
  c:                 char;
  f:                 textFile;
  probabilities:     array of extended;
  total:             extended;
  c2i:               array[char] of longint;
  i2c:               string;

begin
  randomize;
  if paramCount<>2 then begin
    writeln(stderr,'usage: markov word-file chain-depth');
    halt(1);
  end;
  wordFileName:=paramStr(1);
  if not fileExists(wordFileName) then begin
    writeln(stderr,'file '''+wordFileName+''' does not exist');
    halt(1);
  end;
  try
    depth:=strToInt(paramstr(2));
  except
    writeln(stderr,''''+paramStr(2)+''' is not a valid integer');
    halt(1);
  end;
  if depth<=0 then begin
    writeln(stderr,intToStr(depth)+' is not positive');
    halt(1);
  end;

  i2c:='abcdefghijklmnopqrstuvwxyzäöüßABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ';
  for i:=length(i2c)-1 downto 1 do
    for j:=length(i2c) downto i+1 do
      if i2c[i]=i2c[j] then
        delete(i2c,j,1);
  for c:=#0 to #255 do
    c2i[c]:=-1;
  for i:=1 to length(i2c) do
    c2i[i2c[i]]:=i;

  setLength(probabilities,round(power(length(i2c)+1,depth)));
  for i:=0 to length(probabilities)-1 do
    probabilities[i]:=0;

  assignFile(f,wordFileName);
  reset(f);
  while not eof(f) do begin
    readln(f,s);
    current:=0; // reset current index
    for i:=1 to length(s) do begin
      current:=current*(length(i2c)+1) mod length(probabilities);
      if c2i[s[i]]<0 then begin
        if current<>0 then // terminate word
          probabilities[current]:=probabilities[current]+1;
        current:=0;
        continue;
      end;
      current:=current + c2i[s[i]];
      probabilities[current]:=probabilities[current]+1;
    end;
  end;
  closeFile(f);

  for i:=0 to length(probabilities) div (length(i2c)+1)-1 do begin
    total:=0;
    for j:=0 to length(i2c) do
      total:=total + probabilities[j + i * (length(i2c)+1)];
    if total=0 then
      continue;
    total:=1/total;
    for j:=0 to length(i2c) do
      probabilities[j + i * (length(i2c)+1)]:= probabilities[j + i * (length(i2c)+1)] * total;
  end;

(*
  // debug output
  for current:=0 to length(probabilities)-1 do begin
    j:=current;
    for i:=1 to depth do begin
      if (j mod (length(i2c)+1)) = 0 then
        write('_')
      else
        write(i2c[j mod (length(i2c)+1)]);
      j:=j div (length(i2c)+1);
    end;
    writeln(' ',probabilities[current]);
  end; *)

  // generation
  current:=0;
  repeat
    current:=current * (length(i2c)+1) mod length(probabilities);
    total:=random;
    while total>0 do begin
      total:=total - probabilities[current];
      inc(current);
    end;
    dec(current);
    if current mod (length(i2c)+1) = 0 then
      break
    else
      write(i2c[current mod (length(i2c)+1)]);
  until false;
  writeln;

  setLength(probabilities,0);
end.