import java.io.*; /** *

SimpleDFAScanner

* *

A simple DFA scanner that recognizes Pascal identifers and numbers.

* *

Copyright (c) 2009 by Ronald Mak

*

For instructional purposes only. No warranties.

*/ public class SimpleSwitchDFAScanner { // Input characters. private static final int LETTER = 0; private static final int DIGIT = 1; private static final int PLUS = 2; private static final int MINUS = 3; private static final int DOT = 4; private static final int E = 5; private static final int OTHER = 6; // Error state. private static final int ERR = -99999; // State-transition matrix (acceptance states < 0) // Encoded with switch statements below. // private static final int matrix[][] = { // // /* letter digit + - . E other */ // /* 0 */ { 1, 4, 3, 3, ERR, 1, ERR }, // /* 1 */ { 1, 1, -2, -2, -2, 1, -2 }, // /* 2 */ { ERR, ERR, ERR, ERR, ERR, ERR, ERR }, // /* 3 */ { ERR, 4, ERR, ERR, ERR, ERR, ERR }, // /* 4 */ { -5, 4, -5, -5, 6, 9, -5 }, // /* 5 */ { ERR, ERR, ERR, ERR, ERR, ERR, ERR }, // /* 6 */ { ERR, 7, ERR, ERR, ERR, ERR, ERR }, // /* 7 */ { -8, 7, -8, -8, -8, 9, -8 }, // /* 8 */ { ERR, ERR, ERR, ERR, ERR, ERR, ERR }, // /* 9 */ { ERR, 11, 10, 10, ERR, ERR, ERR }, // /* 10 */ { ERR, 11, ERR, ERR, ERR, ERR, ERR }, // /* 11 */ { -12, 11, -12, -12, -12, -12, -12 }, // /* 12 */ { ERR, ERR, ERR, ERR, ERR, ERR, ERR }, // }; private char ch; // current input character private int state; // current state private int transit() { int nextState = ERR; int charType = typeOf(ch); switch (state) { case 0: switch (charType) { case LETTER: nextState = 1; break; case DIGIT: nextState = 4; break; case PLUS: nextState = 3; break; case MINUS: nextState = 3; break; case E: nextState = 1; break; default: nextState = ERR; } break; case 1: switch (charType) { case LETTER: nextState = 1; break; case DIGIT: nextState = 1; break; case PLUS: nextState = -2; break; case MINUS: nextState = -2; break; case DOT: nextState = -2; break; case E: nextState = 1; break; default: nextState = -2; } break; case 2: nextState = ERR; break; case 3: switch (charType) { case DIGIT: nextState = 4; break; } break; case 4: switch (charType) { case LETTER: nextState = -5; break; case DIGIT: nextState = 4; break; case PLUS: nextState = -5; break; case MINUS: nextState = -5; break; case DOT: nextState = 6; break; case E: nextState = 9; break; default: nextState = -5; } break; case 5: nextState = ERR; break; case 6: switch (charType) { case DIGIT: nextState = 7; break; } break; case 7: switch (charType) { case LETTER: nextState = -8; break; case DIGIT: nextState = 7; break; case PLUS: nextState = -8; break; case MINUS: nextState = -8; break; case DOT: nextState = -8; break; case E: nextState = 9; break; default: nextState = -8; } break; case 8: nextState = ERR; break; case 9: switch (charType) { case DIGIT: nextState = 11; break; case PLUS: nextState = 10; break; case MINUS: nextState = 10; break; default: nextState = ERR; } break; case 10: switch (charType) { case DIGIT: nextState = 11; break; } break; case 11: switch (charType) { case LETTER: nextState = -12; break; case DIGIT: nextState = 11; break; case PLUS: nextState = -12; break; case MINUS: nextState = -12; break; case DOT: nextState = -12; break; case E: nextState = -12; break; default: nextState = -12; } break; case 12: nextState = ERR; break; } return nextState; } /** * Extract the next token from the source file. * @return name of the next token * @throws Exception if an error occurs. */ private String nextToken() throws Exception { // Skip blanks. while (Character.isWhitespace(ch)) { nextChar(); } // At EOF? if (ch == 0) { return null; } state = 0; // start state StringBuilder buffer = new StringBuilder(); // Loop to do state transitions. while (state >= 0) { // not acceptance state state = transit(); // transition if ((state >= 0) || (state == ERR)) { buffer.append(ch); // build token string nextChar(); } } return buffer.toString(); } /** * Scan the source file. * @throws Exception if an error occurs. */ private void scan() throws Exception { nextChar(); while (ch != 0) { // EOF? String token = nextToken(); if (token != null) { System.out.print("=====> \"" + token + "\" "); String tokenType = (state == -2) ? "IDENTIFIER" : (state == -5) ? "INTEGER" : (state == -8) ? "REAL (fraction only)" : (state == -12) ? "REAL" : "*** ERROR ***"; System.out.println(tokenType); } } } /** * Return the character type. * @param ch the character. * @return the type. */ int typeOf(char ch) { return (ch == 'E') ? E : Character.isLetter(ch) ? LETTER : Character.isDigit(ch) ? DIGIT : (ch == '+') ? PLUS : (ch == '-') ? MINUS : (ch == '.') ? DOT : OTHER; } private BufferedReader reader; private String line = null; private int lineNumber = 0; private int linePos = -1; /** * Get the next character form the source file. * @throws Exception if an error occurred. */ void nextChar() throws Exception { if ((line == null) || (++linePos >= line.length())) { line = reader.readLine(); if (line != null) { System.out.println("[" + ++lineNumber + "] " + line); line += " "; linePos = 0; ch = line.charAt(0); } else ch = 0; } else { ch = line.charAt(linePos); } } /** * Constructor. * @param sourcePath the source path. */ public SimpleSwitchDFAScanner(String sourcePath) { try { reader = new BufferedReader(new FileReader(sourcePath)); } catch(IOException ex) { ex.printStackTrace(); System.exit(-1); } } /** * Main. * @param args command-line arguments. * @throws Exception if an error occurred. */ public static void main(String[] args) throws Exception { SimpleSwitchDFAScanner scanner = new SimpleSwitchDFAScanner(args[0]); scanner.scan(); } }