From 747d6044dfcad03f2899f8b68cd37925a63ebbf7 Mon Sep 17 00:00:00 2001 From: Erich Eckner Date: Wed, 9 Jan 2019 16:39:20 +0100 Subject: Initial commit --- markov.lpr | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 markov.lpr (limited to 'markov.lpr') diff --git a/markov.lpr b/markov.lpr new file mode 100644 index 0000000..a9e7507 --- /dev/null +++ b/markov.lpr @@ -0,0 +1,122 @@ +program markov; + +{$mode objfpc}{$H+} + +uses + {$IFDEF UNIX}{$IFDEF UseCThreads} + cthreads, + {$ENDIF}{$ENDIF} + Classes + { you can add units after this }, + sysutils, math; + +var + wordFileName,s: string; + depth,i,j,current: int64; + c: char; + f: textFile; + probabilities: array of extended; + total: extended; + c2i: array[char] of longint; + i2c: string; + +begin + randomize; + if paramCount<>2 then begin + writeln(stderr,'usage: markov word-file chain-depth'); + halt(1); + end; + wordFileName:=paramStr(1); + if not fileExists(wordFileName) then begin + writeln(stderr,'file '''+wordFileName+''' does not exist'); + halt(1); + end; + try + depth:=strToInt(paramstr(2)); + except + writeln(stderr,''''+paramStr(2)+''' is not a valid integer'); + halt(1); + end; + if depth<=0 then begin + writeln(stderr,intToStr(depth)+' is not positive'); + halt(1); + end; + + i2c:='abcdefghijklmnopqrstuvwxyzäöüßABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ'; + for i:=length(i2c)-1 downto 1 do + for j:=length(i2c) downto i+1 do + if i2c[i]=i2c[j] then + delete(i2c,j,1); + for c:=#0 to #255 do + c2i[c]:=-1; + for i:=1 to length(i2c) do + c2i[i2c[i]]:=i; + + setLength(probabilities,round(power(length(i2c)+1,depth))); + for i:=0 to length(probabilities)-1 do + probabilities[i]:=0; + + assignFile(f,wordFileName); + reset(f); + while not eof(f) do begin + readln(f,s); + current:=0; // reset current index + for i:=1 to length(s) do begin + current:=current*(length(i2c)+1) mod length(probabilities); + if c2i[s[i]]<0 then begin + if current<>0 then // terminate word + probabilities[current]:=probabilities[current]+1; + current:=0; + continue; + end; + current:=current + c2i[s[i]]; + probabilities[current]:=probabilities[current]+1; + end; + end; + closeFile(f); + + for i:=0 to length(probabilities) div (length(i2c)+1)-1 do begin + total:=0; + for j:=0 to length(i2c) do + total:=total + probabilities[j + i * (length(i2c)+1)]; + if total=0 then + continue; + total:=1/total; + for j:=0 to length(i2c) do + probabilities[j + i * (length(i2c)+1)]:= probabilities[j + i * (length(i2c)+1)] * total; + end; + +(* + // debug output + for current:=0 to length(probabilities)-1 do begin + j:=current; + for i:=1 to depth do begin + if (j mod (length(i2c)+1)) = 0 then + write('_') + else + write(i2c[j mod (length(i2c)+1)]); + j:=j div (length(i2c)+1); + end; + writeln(' ',probabilities[current]); + end; *) + + // generation + current:=0; + repeat + current:=current * (length(i2c)+1) mod length(probabilities); + total:=random; + while total>0 do begin + total:=total - probabilities[current]; + inc(current); + end; + dec(current); + if current mod (length(i2c)+1) = 0 then + break + else + write(i2c[current mod (length(i2c)+1)]); + until false; + writeln; + + setLength(probabilities,0); +end. + -- cgit v1.2.3-70-g09d2