Main Page   Packages   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

Lexer.java

00001 package edu.ksu.cis.bandera.jjjc.unicodepreprocessor.lexer;
00002 
00003 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00004  * Bandera, a Java(TM) analysis and transformation toolkit           *
00005  * Copyright (C) 1998, 1999   Robby (robby@cis.ksu.edu)              *
00006  * All rights reserved.                                              *
00007  *                                                                   *
00008  * This work was done as a project in the SAnToS Laboratory,         *
00009  * Department of Computing and Information Sciences, Kansas State    *
00010  * University, USA (http://www.cis.ksu.edu/santos).                  *
00011  * It is understood that any modification not identified as such is  *
00012  * not covered by the preceding statement.                           *
00013  *                                                                   *
00014  * This work is free software; you can redistribute it and/or        *
00015  * modify it under the terms of the GNU Library General Public       *
00016  * License as published by the Free Software Foundation; either      *
00017  * version 2 of the License, or (at your option) any later version.  *
00018  *                                                                   *
00019  * This work is distributed in the hope that it will be useful,      *
00020  * but WITHOUT ANY WARRANTY; without even the implied warranty of    *
00021  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *
00022  * Library General Public License for more details.                  *
00023  *                                                                   *
00024  * You should have received a copy of the GNU Library General Public *
00025  * License along with this toolkit; if not, write to the             *
00026  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,      *
00027  * Boston, MA  02111-1307, USA.                                      *
00028  *                                                                   *
00029  * Java is a trademark of Sun Microsystems, Inc.                     *
00030  *                                                                   *
00031  * To submit a bug report, send a comment, or get the latest news on *
00032  * this project and other SAnToS projects, please visit the web-site *
00033  *                http://www.cis.ksu.edu/santos                      *
00034  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00035 import java.io.*;
00036 import ca.mcgill.sable.util.*;
00037 import edu.ksu.cis.bandera.jjjc.unicodepreprocessor.node.*;
00038 
00039 public class Lexer
00040 {
00041     protected Token token;
00042     protected State state = State.NORMAL;
00043 
00044     private PushbackReader in;
00045     private int line;
00046     private int pos;
00047     private boolean cr;
00048     private boolean eof;
00049     private final StringBuffer text = new StringBuffer();
00050 
00051     private static int[][][] gotoTable;
00052 /*  {
00053         {{0, 25, 1}, {26, 26, 2}, {27, 91, 1}, {92, 92, 3}, {93, 65535, 1}, },
00054         {},
00055         {},
00056         {{92, 92, 4}, {117, 117, 5}, },
00057         {},
00058         {{48, 57, 6}, {65, 70, 7}, {97, 102, 8}, {117, 117, 5}, },
00059         {{48, 57, 9}, {65, 70, 10}, {97, 102, 11}, },
00060         {{48, 102, -8}, },
00061         {{48, 102, -8}, },
00062         {{48, 57, 12}, {65, 70, 13}, {97, 102, 14}, },
00063         {{48, 102, -11}, },
00064         {{48, 102, -11}, },
00065         {{48, 57, 15}, {65, 70, 16}, {97, 102, 17}, },
00066         {{48, 102, -14}, },
00067         {{48, 102, -14}, },
00068         {},
00069         {},
00070         {},
00071     };*/
00072 
00073     private static int[][] accept;
00074 /*  {
00075         {-1, 4, 3, 4, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, },
00076         {-1, 4, 3, 4, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, },
00077 
00078     };*/
00079 
00080     public static class State
00081     {
00082         public final static State NORMAL = new State(0);
00083         public final static State SUB = new State(1);
00084 
00085         private int id;
00086 
00087         private State(int id)
00088         {
00089             this.id = id;
00090         }
00091 
00092         public int id()
00093         {
00094             return id;
00095         }
00096     }
00097     public Lexer(PushbackReader in)
00098     {
00099         this.in = in;
00100 
00101         if(gotoTable == null)
00102         {
00103             try
00104             {
00105                 DataInputStream s = new DataInputStream(
00106                     new BufferedInputStream(
00107                     Lexer.class.getResourceAsStream("lexer.dat")));
00108 
00109                 // read gotoTable
00110                 int length = s.readInt();
00111                 gotoTable = new int[length][][];
00112                 for(int i = 0; i < gotoTable.length; i++)
00113                 {
00114                     length = s.readInt();
00115                     gotoTable[i] = new int[length][3];
00116                     for(int j = 0; j < gotoTable[i].length; j++)
00117                     {
00118                         for(int k = 0; k < 3; k++)
00119                         {
00120                             gotoTable[i][j][k] = s.readInt();
00121                         }
00122                     }
00123                 }
00124 
00125                 // read accept
00126                 length = s.readInt();
00127                 accept = new int[length][];
00128                 for(int i = 0; i < accept.length; i++)
00129                 {
00130                     length = s.readInt();
00131                     accept[i] = new int[length];
00132                     for(int j = 0; j < accept[i].length; j++)
00133                     {
00134                         accept[i][j] = s.readInt();
00135                     }
00136                 }
00137 
00138                 s.close();
00139             }
00140             catch(Exception e)
00141             {
00142                 throw new RuntimeException("Unable to read lexer.dat.");
00143             }
00144         }
00145     }
00146     protected void filter() throws LexerException, IOException
00147     {
00148     }
00149     private int getChar() throws IOException
00150     {
00151         if(eof)
00152         {
00153             return -1;
00154         }
00155 
00156         int result = in.read();
00157 
00158         if(result == -1)
00159         {
00160             eof = true;
00161         }
00162 
00163         return result;
00164     }
00165     private String getText(int acceptLength)
00166     {
00167         StringBuffer s = new StringBuffer(acceptLength);
00168         for(int i = 0; i < acceptLength; i++)
00169         {
00170             s.append(text.charAt(i));
00171         }
00172 
00173         return s.toString();
00174     }
00175     protected Token getToken() throws IOException, LexerException
00176     {
00177         int dfa_state = 0;
00178 
00179         int start_pos = pos;
00180         int start_line = line;
00181 
00182         int accept_state = -1;
00183         int accept_token = -1;
00184         int accept_length = -1;
00185         int accept_pos = -1;
00186         int accept_line = -1;
00187 
00188         text.setLength(0);
00189 
00190         while(true)
00191         {
00192             int c = getChar();
00193 
00194             if(c != -1)
00195             {
00196                 switch(c)
00197                 {
00198                 case 10:
00199                     if(cr)
00200                     {
00201                         cr = false;
00202                     }
00203                     else
00204                     {
00205                         line++;
00206                         pos = 0;
00207                     }
00208                     break;
00209                 case 13:
00210                     line++;
00211                     pos = 0;
00212                     cr = true;
00213                     break;
00214                 default:
00215                     pos++;
00216                     cr = false;
00217                     break;
00218                 };
00219 
00220                 text.append((char) c);
00221 
00222                 do
00223                 {
00224                     int oldState = (dfa_state < -1) ? (-2 -dfa_state) : dfa_state;
00225 
00226                     dfa_state = -1;
00227 
00228                     int low = 0;
00229                     int high = gotoTable[oldState].length - 1;
00230 
00231                     while(low <= high)
00232                     {
00233                         int middle = (low + high) / 2;
00234 
00235                         if(c < gotoTable[oldState][middle][0])
00236                         {
00237                             high = middle - 1;
00238                         }
00239                         else if(c > gotoTable[oldState][middle][1])
00240                         {
00241                             low = middle + 1;
00242                         }
00243                         else
00244                         {
00245                             dfa_state = gotoTable[oldState][middle][2];
00246                             break;
00247                         }
00248                     }
00249                 }while(dfa_state < -1);
00250             }
00251             else
00252             {
00253                 dfa_state = -1;
00254             }
00255 
00256             if(dfa_state >= 0)
00257             {
00258                 if(accept[state.id()][dfa_state] != -1)
00259                 {
00260                     accept_state = dfa_state;
00261                     accept_token = accept[state.id()][dfa_state];
00262                     accept_length = text.length();
00263                     accept_pos = pos;
00264                     accept_line = line;
00265                 }
00266             }
00267             else
00268             {
00269                 if(accept_state != -1)
00270                 {
00271                     switch(accept_token)
00272                     {
00273                     case 0:
00274                         {
00275                             Token token = new0(
00276                                 start_line + 1,
00277                                 start_pos + 1);
00278                             pushBack(accept_length);
00279                             pos = accept_pos;
00280                             line = accept_line;
00281                             return token;
00282                         }
00283                     case 1:
00284                         {
00285                             Token token = new1(
00286                                 getText(accept_length),
00287                                 start_line + 1,
00288                                 start_pos + 1);
00289                             pushBack(accept_length);
00290                             pos = accept_pos;
00291                             line = accept_line;
00292                             return token;
00293                         }
00294                     case 2:
00295                         {
00296                             Token token = new2(
00297                                 getText(accept_length),
00298                                 start_line + 1,
00299                                 start_pos + 1);
00300                             pushBack(accept_length);
00301                             pos = accept_pos;
00302                             line = accept_line;
00303                             return token;
00304                         }
00305                     case 3:
00306                         {
00307                             Token token = new3(
00308                                 getText(accept_length),
00309                                 start_line + 1,
00310                                 start_pos + 1);
00311                             pushBack(accept_length);
00312                             pos = accept_pos;
00313                             line = accept_line;
00314                             switch(state.id())
00315                             {
00316                                 case 0: state = State.SUB; break;
00317                                 case 1: state = State.SUB; break;
00318                             }
00319                             return token;
00320                         }
00321                     case 4:
00322                         {
00323                             Token token = new4(
00324                                 getText(accept_length),
00325                                 start_line + 1,
00326                                 start_pos + 1);
00327                             pushBack(accept_length);
00328                             pos = accept_pos;
00329                             line = accept_line;
00330                             return token;
00331                         }
00332                     }
00333                 }
00334                 else
00335                 {
00336                     if(text.length() > 0)
00337                     {
00338                         throw new LexerException(
00339                             "[" + (start_line + 1) + "," + (start_pos + 1) + "]" +
00340                             " Unknown token: " + text);
00341                     }
00342                     else
00343                     {
00344                         EOF token = new EOF(
00345                             start_line + 1,
00346                             start_pos + 1);
00347                         return token;
00348                     }
00349                 }
00350             }
00351         }
00352     }
00353     Token new0(int line, int pos) { return new TEvenBackslash(line, pos); }
00354     Token new1(String text, int line, int pos) { return new TUnicodeEscape(text, line, pos); }
00355     Token new2(String text, int line, int pos) { return new TErroneousEscape(text, line, pos); }
00356     Token new3(String text, int line, int pos) { return new TSub(text, line, pos); }
00357     Token new4(String text, int line, int pos) { return new TRawInputCharacter(text, line, pos); }
00358     public Token next() throws LexerException, IOException
00359     {
00360         while(token == null)
00361         {
00362             token = getToken();
00363             filter();
00364         }
00365 
00366         Token result = token;
00367         token = null;
00368         return result;
00369     }
00370     public Token peek() throws LexerException, IOException
00371     {
00372         while(token == null)
00373         {
00374             token = getToken();
00375             filter();
00376         }
00377 
00378         return token;
00379     }
00380     private void pushBack(int acceptLength) throws IOException
00381     {
00382         int length = text.length();
00383         for(int i = length - 1; i >= acceptLength; i--)
00384         {
00385             eof = false;
00386 
00387             in.unread(text.charAt(i));
00388         }
00389     }
00390     protected void unread(Token token) throws IOException
00391     {
00392         String text = token.getText();
00393         int length = text.length();
00394 
00395         for(int i = length - 1; i >= 0; i--)
00396         {
00397             eof = false;
00398 
00399             in.unread(text.charAt(i));
00400         }
00401 
00402         pos = token.getPos() - 1;
00403         line = token.getLine() - 1;
00404     }
00405 }

Generated at Thu Feb 7 06:49:28 2002 for Bandera by doxygen1.2.10 written by Dimitri van Heesch, © 1997-2001