00001 package edu.ksu.cis.bandera.jjjc.unicodepreprocessor.lexer;
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 import java.io.*;
00036 import ca.mcgill.sable.util.*;
00037 import edu.ksu.cis.bandera.jjjc.unicodepreprocessor.node.*;
00038
00039 public class Lexer
00040 {
00041 protected Token token;
00042 protected State state = State.NORMAL;
00043
00044 private PushbackReader in;
00045 private int line;
00046 private int pos;
00047 private boolean cr;
00048 private boolean eof;
00049 private final StringBuffer text = new StringBuffer();
00050
00051 private static int[][][] gotoTable;
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 private static int[][] accept;
00074
00075
00076
00077
00078
00079
00080 public static class State
00081 {
00082 public final static State NORMAL = new State(0);
00083 public final static State SUB = new State(1);
00084
00085 private int id;
00086
00087 private State(int id)
00088 {
00089 this.id = id;
00090 }
00091
00092 public int id()
00093 {
00094 return id;
00095 }
00096 }
00097 public Lexer(PushbackReader in)
00098 {
00099 this.in = in;
00100
00101 if(gotoTable == null)
00102 {
00103 try
00104 {
00105 DataInputStream s = new DataInputStream(
00106 new BufferedInputStream(
00107 Lexer.class.getResourceAsStream("lexer.dat")));
00108
00109
00110 int length = s.readInt();
00111 gotoTable = new int[length][][];
00112 for(int i = 0; i < gotoTable.length; i++)
00113 {
00114 length = s.readInt();
00115 gotoTable[i] = new int[length][3];
00116 for(int j = 0; j < gotoTable[i].length; j++)
00117 {
00118 for(int k = 0; k < 3; k++)
00119 {
00120 gotoTable[i][j][k] = s.readInt();
00121 }
00122 }
00123 }
00124
00125
00126 length = s.readInt();
00127 accept = new int[length][];
00128 for(int i = 0; i < accept.length; i++)
00129 {
00130 length = s.readInt();
00131 accept[i] = new int[length];
00132 for(int j = 0; j < accept[i].length; j++)
00133 {
00134 accept[i][j] = s.readInt();
00135 }
00136 }
00137
00138 s.close();
00139 }
00140 catch(Exception e)
00141 {
00142 throw new RuntimeException("Unable to read lexer.dat.");
00143 }
00144 }
00145 }
00146 protected void filter() throws LexerException, IOException
00147 {
00148 }
00149 private int getChar() throws IOException
00150 {
00151 if(eof)
00152 {
00153 return -1;
00154 }
00155
00156 int result = in.read();
00157
00158 if(result == -1)
00159 {
00160 eof = true;
00161 }
00162
00163 return result;
00164 }
00165 private String getText(int acceptLength)
00166 {
00167 StringBuffer s = new StringBuffer(acceptLength);
00168 for(int i = 0; i < acceptLength; i++)
00169 {
00170 s.append(text.charAt(i));
00171 }
00172
00173 return s.toString();
00174 }
00175 protected Token getToken() throws IOException, LexerException
00176 {
00177 int dfa_state = 0;
00178
00179 int start_pos = pos;
00180 int start_line = line;
00181
00182 int accept_state = -1;
00183 int accept_token = -1;
00184 int accept_length = -1;
00185 int accept_pos = -1;
00186 int accept_line = -1;
00187
00188 text.setLength(0);
00189
00190 while(true)
00191 {
00192 int c = getChar();
00193
00194 if(c != -1)
00195 {
00196 switch(c)
00197 {
00198 case 10:
00199 if(cr)
00200 {
00201 cr = false;
00202 }
00203 else
00204 {
00205 line++;
00206 pos = 0;
00207 }
00208 break;
00209 case 13:
00210 line++;
00211 pos = 0;
00212 cr = true;
00213 break;
00214 default:
00215 pos++;
00216 cr = false;
00217 break;
00218 };
00219
00220 text.append((char) c);
00221
00222 do
00223 {
00224 int oldState = (dfa_state < -1) ? (-2 -dfa_state) : dfa_state;
00225
00226 dfa_state = -1;
00227
00228 int low = 0;
00229 int high = gotoTable[oldState].length - 1;
00230
00231 while(low <= high)
00232 {
00233 int middle = (low + high) / 2;
00234
00235 if(c < gotoTable[oldState][middle][0])
00236 {
00237 high = middle - 1;
00238 }
00239 else if(c > gotoTable[oldState][middle][1])
00240 {
00241 low = middle + 1;
00242 }
00243 else
00244 {
00245 dfa_state = gotoTable[oldState][middle][2];
00246 break;
00247 }
00248 }
00249 }while(dfa_state < -1);
00250 }
00251 else
00252 {
00253 dfa_state = -1;
00254 }
00255
00256 if(dfa_state >= 0)
00257 {
00258 if(accept[state.id()][dfa_state] != -1)
00259 {
00260 accept_state = dfa_state;
00261 accept_token = accept[state.id()][dfa_state];
00262 accept_length = text.length();
00263 accept_pos = pos;
00264 accept_line = line;
00265 }
00266 }
00267 else
00268 {
00269 if(accept_state != -1)
00270 {
00271 switch(accept_token)
00272 {
00273 case 0:
00274 {
00275 Token token = new0(
00276 start_line + 1,
00277 start_pos + 1);
00278 pushBack(accept_length);
00279 pos = accept_pos;
00280 line = accept_line;
00281 return token;
00282 }
00283 case 1:
00284 {
00285 Token token = new1(
00286 getText(accept_length),
00287 start_line + 1,
00288 start_pos + 1);
00289 pushBack(accept_length);
00290 pos = accept_pos;
00291 line = accept_line;
00292 return token;
00293 }
00294 case 2:
00295 {
00296 Token token = new2(
00297 getText(accept_length),
00298 start_line + 1,
00299 start_pos + 1);
00300 pushBack(accept_length);
00301 pos = accept_pos;
00302 line = accept_line;
00303 return token;
00304 }
00305 case 3:
00306 {
00307 Token token = new3(
00308 getText(accept_length),
00309 start_line + 1,
00310 start_pos + 1);
00311 pushBack(accept_length);
00312 pos = accept_pos;
00313 line = accept_line;
00314 switch(state.id())
00315 {
00316 case 0: state = State.SUB; break;
00317 case 1: state = State.SUB; break;
00318 }
00319 return token;
00320 }
00321 case 4:
00322 {
00323 Token token = new4(
00324 getText(accept_length),
00325 start_line + 1,
00326 start_pos + 1);
00327 pushBack(accept_length);
00328 pos = accept_pos;
00329 line = accept_line;
00330 return token;
00331 }
00332 }
00333 }
00334 else
00335 {
00336 if(text.length() > 0)
00337 {
00338 throw new LexerException(
00339 "[" + (start_line + 1) + "," + (start_pos + 1) + "]" +
00340 " Unknown token: " + text);
00341 }
00342 else
00343 {
00344 EOF token = new EOF(
00345 start_line + 1,
00346 start_pos + 1);
00347 return token;
00348 }
00349 }
00350 }
00351 }
00352 }
00353 Token new0(int line, int pos) { return new TEvenBackslash(line, pos); }
00354 Token new1(String text, int line, int pos) { return new TUnicodeEscape(text, line, pos); }
00355 Token new2(String text, int line, int pos) { return new TErroneousEscape(text, line, pos); }
00356 Token new3(String text, int line, int pos) { return new TSub(text, line, pos); }
00357 Token new4(String text, int line, int pos) { return new TRawInputCharacter(text, line, pos); }
00358 public Token next() throws LexerException, IOException
00359 {
00360 while(token == null)
00361 {
00362 token = getToken();
00363 filter();
00364 }
00365
00366 Token result = token;
00367 token = null;
00368 return result;
00369 }
00370 public Token peek() throws LexerException, IOException
00371 {
00372 while(token == null)
00373 {
00374 token = getToken();
00375 filter();
00376 }
00377
00378 return token;
00379 }
00380 private void pushBack(int acceptLength) throws IOException
00381 {
00382 int length = text.length();
00383 for(int i = length - 1; i >= acceptLength; i--)
00384 {
00385 eof = false;
00386
00387 in.unread(text.charAt(i));
00388 }
00389 }
00390 protected void unread(Token token) throws IOException
00391 {
00392 String text = token.getText();
00393 int length = text.length();
00394
00395 for(int i = length - 1; i >= 0; i--)
00396 {
00397 eof = false;
00398
00399 in.unread(text.charAt(i));
00400 }
00401
00402 pos = token.getPos() - 1;
00403 line = token.getLine() - 1;
00404 }
00405 }