00001 package edu.ksu.cis.bandera.specification.pattern.lexer;
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 import java.io.*;
00039 import java.util.*;
00040 import edu.ksu.cis.bandera.specification.pattern.node.*;
00041
00042 public class Lexer
00043 {
00044 protected Token token;
00045 protected State state = State.INITIAL;
00046
00047 private PushbackReader in;
00048 private int line;
00049 private int pos;
00050 private boolean cr;
00051 private boolean eof;
00052 private final StringBuffer text = new StringBuffer();
00053
00054 private static int[][][][] gotoTable;
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 private static int[][] accept;
00104
00105
00106
00107
00108
00109
00110 public static class State
00111 {
00112 public final static State INITIAL = new State(0);
00113
00114 private int id;
00115
00116 private State(int id)
00117 {
00118 this.id = id;
00119 }
00120
00121 public int id()
00122 {
00123 return id;
00124 }
00125 }
00126 public Lexer(PushbackReader in)
00127 {
00128 this.in = in;
00129
00130 if(gotoTable == null)
00131 {
00132 try
00133 {
00134 DataInputStream s = new DataInputStream(
00135 new BufferedInputStream(
00136 Lexer.class.getResourceAsStream("lexer.dat")));
00137
00138
00139 int length = s.readInt();
00140 gotoTable = new int[length][][][];
00141 for(int i = 0; i < gotoTable.length; i++)
00142 {
00143 length = s.readInt();
00144 gotoTable[i] = new int[length][][];
00145 for(int j = 0; j < gotoTable[i].length; j++)
00146 {
00147 length = s.readInt();
00148 gotoTable[i][j] = new int[length][3];
00149 for(int k = 0; k < gotoTable[i][j].length; k++)
00150 {
00151 for(int l = 0; l < 3; l++)
00152 {
00153 gotoTable[i][j][k][l] = s.readInt();
00154 }
00155 }
00156 }
00157 }
00158
00159
00160 length = s.readInt();
00161 accept = new int[length][];
00162 for(int i = 0; i < accept.length; i++)
00163 {
00164 length = s.readInt();
00165 accept[i] = new int[length];
00166 for(int j = 0; j < accept[i].length; j++)
00167 {
00168 accept[i][j] = s.readInt();
00169 }
00170 }
00171
00172 s.close();
00173 }
00174 catch(Exception e)
00175 {
00176 throw new RuntimeException("Unable to read lexer.dat.");
00177 }
00178 }
00179 }
00180 protected void filter() throws LexerException, IOException
00181 {
00182 }
00183 private int getChar() throws IOException
00184 {
00185 if(eof)
00186 {
00187 return -1;
00188 }
00189
00190 int result = in.read();
00191
00192 if(result == -1)
00193 {
00194 eof = true;
00195 }
00196
00197 return result;
00198 }
00199 private String getText(int acceptLength)
00200 {
00201 StringBuffer s = new StringBuffer(acceptLength);
00202 for(int i = 0; i < acceptLength; i++)
00203 {
00204 s.append(text.charAt(i));
00205 }
00206
00207 return s.toString();
00208 }
00209 protected Token getToken() throws IOException, LexerException
00210 {
00211 int dfa_state = 0;
00212
00213 int start_pos = pos;
00214 int start_line = line;
00215
00216 int accept_state = -1;
00217 int accept_token = -1;
00218 int accept_length = -1;
00219 int accept_pos = -1;
00220 int accept_line = -1;
00221
00222 int[][][] gotoTable = this.gotoTable[state.id()];
00223 int[] accept = this.accept[state.id()];
00224 text.setLength(0);
00225
00226 while(true)
00227 {
00228 int c = getChar();
00229
00230 if(c != -1)
00231 {
00232 switch(c)
00233 {
00234 case 10:
00235 if(cr)
00236 {
00237 cr = false;
00238 }
00239 else
00240 {
00241 line++;
00242 pos = 0;
00243 }
00244 break;
00245 case 13:
00246 line++;
00247 pos = 0;
00248 cr = true;
00249 break;
00250 default:
00251 pos++;
00252 cr = false;
00253 break;
00254 };
00255
00256 text.append((char) c);
00257
00258 do
00259 {
00260 int oldState = (dfa_state < -1) ? (-2 -dfa_state) : dfa_state;
00261
00262 dfa_state = -1;
00263
00264 int[][] tmp1 = gotoTable[oldState];
00265 int low = 0;
00266 int high = tmp1.length - 1;
00267
00268 while(low <= high)
00269 {
00270 int middle = (low + high) / 2;
00271 int[] tmp2 = tmp1[middle];
00272
00273 if(c < tmp2[0])
00274 {
00275 high = middle - 1;
00276 }
00277 else if(c > tmp2[1])
00278 {
00279 low = middle + 1;
00280 }
00281 else
00282 {
00283 dfa_state = tmp2[2];
00284 break;
00285 }
00286 }
00287 }while(dfa_state < -1);
00288 }
00289 else
00290 {
00291 dfa_state = -1;
00292 }
00293
00294 if(dfa_state >= 0)
00295 {
00296 if(accept[dfa_state] != -1)
00297 {
00298 accept_state = dfa_state;
00299 accept_token = accept[dfa_state];
00300 accept_length = text.length();
00301 accept_pos = pos;
00302 accept_line = line;
00303 }
00304 }
00305 else
00306 {
00307 if(accept_state != -1)
00308 {
00309 switch(accept_token)
00310 {
00311 case 0:
00312 {
00313 Token token = new0(
00314 getText(accept_length),
00315 start_line + 1,
00316 start_pos + 1);
00317 pushBack(accept_length);
00318 pos = accept_pos;
00319 line = accept_line;
00320 return token;
00321 }
00322 case 1:
00323 {
00324 Token token = new1(
00325 start_line + 1,
00326 start_pos + 1);
00327 pushBack(accept_length);
00328 pos = accept_pos;
00329 line = accept_line;
00330 return token;
00331 }
00332 case 2:
00333 {
00334 Token token = new2(
00335 start_line + 1,
00336 start_pos + 1);
00337 pushBack(accept_length);
00338 pos = accept_pos;
00339 line = accept_line;
00340 return token;
00341 }
00342 case 3:
00343 {
00344 Token token = new3(
00345 start_line + 1,
00346 start_pos + 1);
00347 pushBack(accept_length);
00348 pos = accept_pos;
00349 line = accept_line;
00350 return token;
00351 }
00352 case 4:
00353 {
00354 Token token = new4(
00355 start_line + 1,
00356 start_pos + 1);
00357 pushBack(accept_length);
00358 pos = accept_pos;
00359 line = accept_line;
00360 return token;
00361 }
00362 case 5:
00363 {
00364 Token token = new5(
00365 start_line + 1,
00366 start_pos + 1);
00367 pushBack(accept_length);
00368 pos = accept_pos;
00369 line = accept_line;
00370 return token;
00371 }
00372 case 6:
00373 {
00374 Token token = new6(
00375 start_line + 1,
00376 start_pos + 1);
00377 pushBack(accept_length);
00378 pos = accept_pos;
00379 line = accept_line;
00380 return token;
00381 }
00382 case 7:
00383 {
00384 Token token = new7(
00385 getText(accept_length),
00386 start_line + 1,
00387 start_pos + 1);
00388 pushBack(accept_length);
00389 pos = accept_pos;
00390 line = accept_line;
00391 return token;
00392 }
00393 case 8:
00394 {
00395 Token token = new8(
00396 getText(accept_length),
00397 start_line + 1,
00398 start_pos + 1);
00399 pushBack(accept_length);
00400 pos = accept_pos;
00401 line = accept_line;
00402 return token;
00403 }
00404 }
00405 }
00406 else
00407 {
00408 if(text.length() > 0)
00409 {
00410 throw new LexerException(
00411 "[" + (start_line + 1) + "," + (start_pos + 1) + "]" +
00412 " Unknown token: " + text);
00413 }
00414 else
00415 {
00416 EOF token = new EOF(
00417 start_line + 1,
00418 start_pos + 1);
00419 return token;
00420 }
00421 }
00422 }
00423 }
00424 }
00425 Token new0(String text, int line, int pos) { return new TWhiteSpace(text, line, pos); }
00426 Token new1(int line, int pos) { return new TLBrace(line, pos); }
00427 Token new2(int line, int pos) { return new TRBrace(line, pos); }
00428 Token new3(int line, int pos) { return new TComma(line, pos); }
00429 Token new4(int line, int pos) { return new TPlus(line, pos); }
00430 Token new5(int line, int pos) { return new TEqual(line, pos); }
00431 Token new6(int line, int pos) { return new TPattern(line, pos); }
00432 Token new7(String text, int line, int pos) { return new TStringLiteral(text, line, pos); }
00433 Token new8(String text, int line, int pos) { return new TId(text, line, pos); }
00434 public Token next() throws LexerException, IOException
00435 {
00436 while(token == null)
00437 {
00438 token = getToken();
00439 filter();
00440 }
00441
00442 Token result = token;
00443 token = null;
00444 return result;
00445 }
00446 public Token peek() throws LexerException, IOException
00447 {
00448 while(token == null)
00449 {
00450 token = getToken();
00451 filter();
00452 }
00453
00454 return token;
00455 }
00456 private void pushBack(int acceptLength) throws IOException
00457 {
00458 int length = text.length();
00459 for(int i = length - 1; i >= acceptLength; i--)
00460 {
00461 eof = false;
00462
00463 in.unread(text.charAt(i));
00464 }
00465 }
00466 protected void unread(Token token) throws IOException
00467 {
00468 String text = token.getText();
00469 int length = text.length();
00470
00471 for(int i = length - 1; i >= 0; i--)
00472 {
00473 eof = false;
00474
00475 in.unread(text.charAt(i));
00476 }
00477
00478 pos = token.getPos() - 1;
00479 line = token.getLine() - 1;
00480 }
00481 }