The following is a memo for myself. Please allow me to leave the article unreadable for a while.
Great past articles and [Paper](http://research.microsoft.com/en-us/um/people/daan/download/ Create a parser combinator quickly by referring to papers / parsec-paper.pdf). Within about 200 lines. I don't care about monads. I hope it works. This makes my simple language.
ParserCombinator.java
public class ParserCombinator {
public static class EndOfInputException extends IOException {private static final long serialVersionUID = 9014321537291324114L;}
public static interface Input<I> {
public I current(); //EOF is expressed as null
public Input<I> next() throws EndOfInputException;
public String positionDescription();
}
public static class ParseException extends Exception { //The "use less exceptional exceptions" pattern. Mainly causedBy,Acts as a vehicle carrying causedAt.
private static final long serialVersionUID = -3808983810373258044L;
public static final ParseException singleton = new ParseException(); //Basically use only this singleton
private String causedBy = "", causedAt = "";
public ParseException setCaused(String causedBy_, String causedAt_) {causedBy = causedBy_; causedAt = causedAt_; return this;}
public String getCausedBy() {return causedBy;}
public String getCausedAt() {return causedAt;}
}
public static interface Parser<I, A> {public Tuple2<A, Input<I>> parse(Input<I> input) throws ParseException;}
public static class ParserMemoizer<I, A> implements Parser<I, A> { //A wrapper used to memoize Parser. It also serves as a place holder when describing recursion.
private Supplier<Parser<I, A>> supplier = null;
public Parser<I, A> defun(Supplier<Parser<I, A>> supplier_) {supplier = supplier_; return this;}
private Parser<I, A> parser = null;
private Map<Input<I>, Tuple2<Tuple2<A, Input<I>>, Tuple2<String, String>>> memo = new HashMap<>();
@Override public Tuple2<A, Input<I>> parse(Input<I> input) throws ParseException {
if (parser == null) parser = supplier.get();
if (! memo.containsKey(input)) {
Tuple2<A, Input<I>> result = null; Tuple2<String, String> error = null;
try {result = parser.parse(input);} catch (ParseException e) {error = Tuple.of(e.getCausedBy(), e.getCausedAt());}
memo.put(input, Tuple.of(result, error));
}
Tuple2<Tuple2<A, Input<I>>, Tuple2<String, String>> result = memo.get(input);
if (result.cdr.car == null) return result.car; else throw ParseException.singleton.setCaused(result.cdr.car.car, result.cdr.car.cdr.car);
}
}
public static <I, A> Parser<I, A> lookahead(Parser<I, A> p) {
return new ParserMemoizer<I, A>().defun(() -> {
return i -> Tuple.of(p.parse(i).car, i); //Returns the pre-read result, but does not proceed with the input itself
});
}
public static <I> Parser<I, I> satisfy(Predicate<I> predicate) {
return new ParserMemoizer<I, I>().defun(
() -> i -> {
try { // i.current()If is null, it is eof, so an exception is raised there. Either way, usually i.I should get an exception at next.
if (i.current() != null && predicate.test(i.current())) return Tuple.of(i.current(), i.next());
} catch (EndOfInputException e) {}
throw ParseException.singleton.setCaused(i.current() == null ? "end of input" : "unsatisfying input " + i.current(), i.positionDescription());
}
);
}
public static <I, A, B> Parser<I, B> apply(Parser<I, A> p, Function<A, B> func) {
return new ParserMemoizer<I, B>().defun(
() -> i -> {
Tuple2<A, Input<I>> result = p.parse(i);
return Tuple.of(func.apply(result.car), result.cdr.car);
}
);
}
public static <I, A, B> Parser<I, B> bind(Parser<I, A> first, Function<A, Parser<I, B>> next) {
return new ParserMemoizer<I, B>().defun(
() -> i -> {
Tuple2<A, Input<I>> result = first.parse(i);
return next.apply(result.car).parse(result.cdr.car);
}
);
}
@SafeVarargs
public static <I, A> Parser<I, A> or(Parser<I, A>... p) {
return new ParserMemoizer<I, A>().defun(
() -> i -> {
for (int idx = 0; idx < p.length; idx ++) try {return p[idx].parse(i);} catch (ParseException e) {}
throw ParseException.singleton.setCaused("no rule matched", i.positionDescription());
}
);
}
public static <I, A, B> Parser<I, B> invert(Parser<I, A> p, Function<Tuple2<A, Input<I>>, ParseException> f0, Function<ParseException, B> f1) { //Returns an exception if p succeeds, throws an exception and returns some result
return new ParserMemoizer<I, B>().defun(
() -> i -> {
B result = null; ParseException ex = null;
try {ex = f0.apply(p.parse(i));} catch (ParseException e) {result = f1.apply(e);}
if (ex == null) return Tuple.of(result, i /*Input cannot proceed*/); else throw ex;
}
);
}
public static <I, A> Parser<I, Boolean> not(Parser<I, A> p, Function<A, String> errorMessageGenerator) {return invert(p, r -> ParseException.singleton.setCaused(errorMessageGenerator.apply(r.car), r.cdr.car.positionDescription()), e -> true);}
public static <I, A, B> Parser<I, B> reduce(Parser<I, List<A>> src, Supplier<B> identityGenerator, BiFunction<B, A, B> accumulator) {
return new ParserMemoizer<I, B>().defun(
() -> i -> {
Tuple2<List<A>, Input<I>> result = src.parse(i);
B reduced = identityGenerator.get();
for (A item : result.car) reduced = accumulator.apply(reduced, item);
return Tuple.of(reduced, result.cdr.car);
}
);
}
public static <I, A> Parser<I, List<A>> lst(List<Parser<I, A>> p) {
return new ParserMemoizer<I, List<A>>().defun(
() -> i -> {
List<A> result = new ArrayList<>();
Input<I> rest = null;
Tuple2<A, Input<I>> cursor = Tuple.of(null, i);
for (int idx = 0; idx < p.size(); idx ++) {
cursor = p.get(idx).parse(cursor.cdr.car);
result.add(cursor.car);
rest = cursor.cdr.car;
}
return Tuple.of(result, rest); //Do not ignore parse exception
}
);
}
@SafeVarargs public static <I, A> Parser<I, List<A>> lst(Parser<I, A>...p) {return lst(Arrays.asList(p));}
public static <I, A, B> Parser<I, List<A>> rep(Supplier<Iterator<B>> iteratorMaker, Parser<I, A> p) {
return new ParserMemoizer<I, List<A>>().defun(
() -> i -> {
Iterator<B> iterator = iteratorMaker.get(); //create a new iterator every time(Otherwise it will be strange when re-executed)
List<A> result = new ArrayList<>();
Input<I> rest = null;
try {
for (Tuple2<A, Input<I>> cursor = Tuple.of(null, i); iterator.hasNext();) {
iterator.next(); //I'm just using iterator as a trick for loops so skip it
cursor = p.parse(cursor.cdr.car);
result.add(cursor.car);
rest = cursor.cdr.car;
}
} catch (ParseException e) {}
return Tuple.of(result, result.size() == 0 ? i : rest); //Returns the original input if it matches only 0 times
}
);
}
private static class LoopIterator implements Iterator<Long> {
private long cursor, until, step;
public LoopIterator(long start_, long until_, long step_) {cursor = start_; until = until_; step = step_;}
public boolean hasNext() {return cursor + step <= until;}
public Long next() {try {return cursor;} finally {cursor += step;}} //Sorry for using the trick to reduce the number of lines. cursor auto boxing and return "later" about the contents cursor+=Execute step.
}
public static <I, A, B> Parser<I, List<A>> rep(long cnt, Parser<I, A> p) {return rep(() -> new LoopIterator(0, cnt, 1), p);}
public static <I, A> Parser<I, List<A>> many(Parser<I, A> p) {return rep(() -> new LoopIterator(0, 1, 0), p);} //A trick to achieve an infinite loop by passing an iterator that doesn't advance
public static <I, A> Parser<I, List<A>> many1(Parser<I, A> p) {return apply(seq(p, many(p)), tpl2 -> {tpl2.cdr.car.add(0, tpl2.car); return tpl2.cdr.car;});}
public static <Z, A, B> Parser<Z, Tuple2<A, B>> seq(Parser<Z, A> a, Parser<Z, B> b) {
return new ParserMemoizer<Z, Tuple2<A, B>>().defun(
() -> i -> {
Tuple2<A, Input<Z>> r0 = a.parse(i);
Tuple2<B, Input<Z>> r1 = b.parse(r0.cdr.car);
return Tuple.of(Tuple.of(r0.car, r1.car), r1.cdr.car);
}
);
}
public static <Z, A, B, C> Parser<Z, Tuple3<A, B, C>> seq(Parser<Z, A> a, Parser<Z, B> b, Parser<Z, C> c) {
return new ParserMemoizer<Z, Tuple3<A, B, C>>().defun(
() -> i -> {
Tuple2<A, Input<Z>> r0 = a.parse(i);
Tuple2<B, Input<Z>> r1 = b.parse(r0.cdr.car);
Tuple2<C, Input<Z>> r2 = c.parse(r1.cdr.car);
return Tuple.of(Tuple.of(r0.car, r1.car, r2.car), r2.cdr.car);
}
);
}
public static <I, A> Parser<I, A> eof() {
return i -> {if (i.current() == null) return Tuple.of(null, i); else throw ParseException.singleton.setCaused("parser stopped while reading", i.positionDescription());};
}
}
Addendum (2017/1/18): Just a few corrections & additions
Recommended Posts