Parsing

Old:	1 \| 2 \| 3 \| 4 \| 5
New:	DCGnums \| Parsing \| Meta-prolog \| Faster1 \| Faster2 \| Rand-nums
Not done:	Abduction \| Stochastic abs

g1=

digraph G { graph [rankdir = LR ]; something we don't care about; graph [lp = "229, 330"]; }

parse.out=

--| chars |------ 100 105 103 114 97 112 104 32 71 32 123 10 32 32 32 32 32 103 114 etc., --| tokens |------ graph $sopen rankdir $equal LR $sclose $end something we don't care about $end graph $sopen lp $equal $quote 229 $comma 330 $quote $sclose $end --| statements |------ rankdir=LR lp=[229, 330]

parse.pl=

% parse.pl % finding statements of the form X=Y % within a complex string :- [format]. demo :- tell('parse.out'), ignore(demo1), told. demo1 :- chars(g1,Chars), % seperate file I/O... tokens(Tokens,Chars,[]), % .. from tokenisation parse(Statements,Tokens,[]), % .. from parsing !, % <--- block backtracking- otherwise DCGs % can go awol. format('\n--| chars |--\n ~20L\tetc.,\n\n',[Chars]), format('\n--| tokens |--\n ~L', [Tokens]), format('\n--| statements |--\n ~L', [Statements]). % note strange convention of DCGs: % inputs string is second last arg % outputs are the other args % file i/o chars(F,List) :- see(F), chars1(List), !, seen. chars(_,[]) :- seen. % catch predicate if chars1 fails. % never leave files open! chars1(L) :- get0(X), % <-- primitive character reader chars2(X,L). chars2(-1,[]) :- !. chars2(H, [H|T]) :- get0(Next), chars2(Next,T). /* understanding text strings is really three problems: 1) tokenization - 2) parsing 3) interpretation Tokenization: The individual atomic expressions of a language are (usually) written down as sequences of simpler characters. The text is ultimately then a character stream. The task of a tokenizer is at least to segment this character sequence turning it into a token stream. The tokens (the fancy word in this context for 'word' or 'atomic expression') are often also assigned a category. We will support two categories: $X will denote special words and X (without a "$") will denote everything else. */ % note that we can "eat" more that one character in a leaf dcg special(digraph) --> "digraph". special(equal) --> "=". special(comma) --> ",". special(end) --> ";". special(quote) --> [34]. % 34= " % curly brackets special(copen) --> "{". special(cclose) --> "}". % round brackets special(ropen) --> "(". special(rclose) --> ")". % square brackets special(sopen) --> "[". special(sclose) --> "]". /* Once tokenized, the input stream is smaller, more abstract, makes writing and debugging a parser easier. */ %------------------------------------- % parser- assuming file written parse(S) --> statements(S). % zero or more statements, % and sometimes we collect information % from them statements([]) --> []. statements([One|Rest]) --> [graph, $sopen], % statements beginning % with keyword "graph", % we collect assignment(One), [$sclose,$end], statements(Rest). statements(Rest) --> % all other statements % are dull and we will % ignore them dull, [$end], statements(Rest). assignment(X=Y) --> [X,$equal], values(Y). % values are either quotes lists... values(Y) --> [$quote], items(Y), [$quote]. % ... or single items values(Y) --> [Y]. % items are connect to other items via commas items([One|Rest]) --> [One,$comma],items(Rest). items([One]) --> [One]. % dull stuff- use carefully. dull --> []. dull --> [_], dull. %------------------------------------- % here's a tokenizer for graphviz files. tokens(T) --> header, tokens1(T), % ignore header and footer footer. header --> whites, % skip over whitespace "digraph", % read, and ignore, a keyword whites, "G", whites, "{". footer --> whites, "}", whites. % program body holds zero or more tokens tokens1([]) --> []. tokens1([One|Rest]) --> token(One), tokens1(Rest). % tokens may have leading white space token(X) --> whites, token1(X). % two token types: % type 1: specials (denoted with a leading "$" token1($X) --> special(X). % type 2: everything else % note the use of predicate ordering to control % the parse- hence, no backtracking please outside % the call to a DCG parser. token1(X)--> blacks(X). % reads all non-white, % non-special characaters % spin down the input string to the first % non-white space thing. % note: good black to write a "comment skipper" whites([],[]) :- !. whites([H|T],Out) :- white(H), !, whites(T,Out). whites(L,L). white(H) :- space([H],[]). space --> " " | tabb| newline. tabb --> [9]. newline --> [10]. % spin down the input string to the first % non-white space or non-special thing. blacks(X,L0,L) :- blacks1(Y,L0,L), name(X,Y). %<-- insert "num" here blacks1([],[],[]) :- !. blacks1([],L,L) :- % tricky stuff- if the NEXT thing % is a special, stop here and % return the list, unchanged special(_,L,_),!. blacks1([H|Blacks],[H|T],Rest) :- \+ white(H), !, blacks1(Blacks,T,Rest). blacks1([],L,L). % phew- thank heavens that we don't need to do % this everytime we write a DSL- for languages % we can define with infix,prefix, postfix operators, % prolog's "read" predicate does all this for us.

format.pl=

% format.pl %-------------------------------------- % stuff to simplify printing clauses. % swi-prolog lets a programmer customize % the format statement. % FIRST, a predicate of arity 2 is registerred % next to some letter :- format_predicate('P',p(_,_)). % SECOND, write the predicate. p(default,X) :- !, p(0,X). p(_,(X :- true)) :- !, format('~p.\n',X). p(_,(X :- Y )) :- !, portray_clause((X :- Y)). p(N,[H|T] ) :- !, not(not((numbervars([H|T],N,_), format('~p',[[H|T]])))). p(N,X ) :- not(not((numbervars(X,N,_), format('~p',X)))). %-------------------------------------- % stuff to simplify right justifying text % FIRST: :- format_predicate('>',padChars(_,_)). % SECOND: padChars(default,A) :- padChars(5,A). % the first arg "S" is the optional argument % someone may have given with the "~" command padChars(S,A) :- writeThing(A,Thing,N), Pad is S - N, % standard trick to emulate % for(i=1;i<=N;i++) { doThis } forall(between(1,Pad,_),put(32)), write(Thing). writeThing(X,S,L) :- % sformat returns the string in % the first arg sformat(S,'~w',[X]), string_length(S,L). %-------------------------------------- % stuff to simplify left justifying text % FIRST: :- format_predicate('<',charsPad(_,_)). % SECOND: charsPad(default,A) :- charsPad(5,A). charsPad(S,A) :- writeThing(A,Thing,N), atom_length(A,N), Pad is S - N, write(Thing), forall(between(1,Pad,_),put(32)). %-------------------------------------- % stuff to simplify printing N twiddles, % scaled to some factor % FIRST: :- format_predicate('S',twiddle(_,_)). % SECOND: twiddle(default,A) :- twiddle(25,A). twiddle(W,N) :- N1 is round(N/W), forall(between(1,N1,_),put(126)). %-------------------------------------- % stuff to simplify printing lists % scaled to some factor % FIRST: :- format_predicate('L',printL(_,_)). % SECOND: printL(default,List) :- printL(10^6,List). printL(TooLong,List) :- forall((nth1(Pos,List,Item), Pos < TooLong), format('\t~w\n',Item)).

Not © Tim Menzies, 2001
Share and enjoy- information wants to be free.
But if you take anything from this site,
please credit tim@menzies.com.