summaryrefslogtreecommitdiff
path: root/markov.lpr
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2019-01-09 16:39:20 +0100
committerErich Eckner <git@eckner.net>2019-01-09 16:39:20 +0100
commit747d6044dfcad03f2899f8b68cd37925a63ebbf7 (patch)
treea4d60a820e8bea3010e2c22aa9b1aa111d348e95 /markov.lpr
downloadmarkov-747d6044dfcad03f2899f8b68cd37925a63ebbf7.tar.xz
Initial commit
Diffstat (limited to 'markov.lpr')
-rw-r--r--markov.lpr122
1 files changed, 122 insertions, 0 deletions
diff --git a/markov.lpr b/markov.lpr
new file mode 100644
index 0000000..a9e7507
--- /dev/null
+++ b/markov.lpr
@@ -0,0 +1,122 @@
+program markov;
+
+{$mode objfpc}{$H+}
+
+uses
+ {$IFDEF UNIX}{$IFDEF UseCThreads}
+ cthreads,
+ {$ENDIF}{$ENDIF}
+ Classes
+ { you can add units after this },
+ sysutils, math;
+
+var
+ wordFileName,s: string;
+ depth,i,j,current: int64;
+ c: char;
+ f: textFile;
+ probabilities: array of extended;
+ total: extended;
+ c2i: array[char] of longint;
+ i2c: string;
+
+begin
+ randomize;
+ if paramCount<>2 then begin
+ writeln(stderr,'usage: markov word-file chain-depth');
+ halt(1);
+ end;
+ wordFileName:=paramStr(1);
+ if not fileExists(wordFileName) then begin
+ writeln(stderr,'file '''+wordFileName+''' does not exist');
+ halt(1);
+ end;
+ try
+ depth:=strToInt(paramstr(2));
+ except
+ writeln(stderr,''''+paramStr(2)+''' is not a valid integer');
+ halt(1);
+ end;
+ if depth<=0 then begin
+ writeln(stderr,intToStr(depth)+' is not positive');
+ halt(1);
+ end;
+
+ i2c:='abcdefghijklmnopqrstuvwxyzäöüßABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ';
+ for i:=length(i2c)-1 downto 1 do
+ for j:=length(i2c) downto i+1 do
+ if i2c[i]=i2c[j] then
+ delete(i2c,j,1);
+ for c:=#0 to #255 do
+ c2i[c]:=-1;
+ for i:=1 to length(i2c) do
+ c2i[i2c[i]]:=i;
+
+ setLength(probabilities,round(power(length(i2c)+1,depth)));
+ for i:=0 to length(probabilities)-1 do
+ probabilities[i]:=0;
+
+ assignFile(f,wordFileName);
+ reset(f);
+ while not eof(f) do begin
+ readln(f,s);
+ current:=0; // reset current index
+ for i:=1 to length(s) do begin
+ current:=current*(length(i2c)+1) mod length(probabilities);
+ if c2i[s[i]]<0 then begin
+ if current<>0 then // terminate word
+ probabilities[current]:=probabilities[current]+1;
+ current:=0;
+ continue;
+ end;
+ current:=current + c2i[s[i]];
+ probabilities[current]:=probabilities[current]+1;
+ end;
+ end;
+ closeFile(f);
+
+ for i:=0 to length(probabilities) div (length(i2c)+1)-1 do begin
+ total:=0;
+ for j:=0 to length(i2c) do
+ total:=total + probabilities[j + i * (length(i2c)+1)];
+ if total=0 then
+ continue;
+ total:=1/total;
+ for j:=0 to length(i2c) do
+ probabilities[j + i * (length(i2c)+1)]:= probabilities[j + i * (length(i2c)+1)] * total;
+ end;
+
+(*
+ // debug output
+ for current:=0 to length(probabilities)-1 do begin
+ j:=current;
+ for i:=1 to depth do begin
+ if (j mod (length(i2c)+1)) = 0 then
+ write('_')
+ else
+ write(i2c[j mod (length(i2c)+1)]);
+ j:=j div (length(i2c)+1);
+ end;
+ writeln(' ',probabilities[current]);
+ end; *)
+
+ // generation
+ current:=0;
+ repeat
+ current:=current * (length(i2c)+1) mod length(probabilities);
+ total:=random;
+ while total>0 do begin
+ total:=total - probabilities[current];
+ inc(current);
+ end;
+ dec(current);
+ if current mod (length(i2c)+1) = 0 then
+ break
+ else
+ write(i2c[current mod (length(i2c)+1)]);
+ until false;
+ writeln;
+
+ setLength(probabilities,0);
+end.
+