Main Page   Packages   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

lr_parser.java

00001 package java_cup.runtime;
00002 
00003 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00004  * Bandera, a Java(TM) analysis and transformation toolkit           *
00005  * Copyright (C) 1998-2001 SAnToS Laboratories (santos@cis.ksu.edu)  *
00006 
00007  * All rights reserved.                                              *
00008  *                                                                   *
00009  * This work was done as a project in the SAnToS Laboratory,         *
00010  * Department of Computing and Information Sciences, Kansas State    *
00011  * University, USA (http://www.cis.ksu.edu/santos).                  *
00012  * It is understood that any modification not identified as such is  *
00013  * not covered by the preceding statement.                           *
00014  *                                                                   *
00015  * This work is free software; you can redistribute it and/or        *
00016  * modify it under the terms of the GNU Library General Public       *
00017  * License as published by the Free Software Foundation; either      *
00018  * version 2 of the License, or (at your option) any later version.  *
00019  *                                                                   *
00020  * This work is distributed in the hope that it will be useful,      *
00021  * but WITHOUT ANY WARRANTY; without even the implied warranty of    *
00022  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *
00023  * Library General Public License for more details.                  *
00024  *                                                                   *
00025  * You should have received a copy of the GNU Library General Public *
00026  * License along with this toolkit; if not, write to the             *
00027  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,      *
00028  * Boston, MA  02111-1307, USA.                                      *
00029  *                                                                   *
00030  * Java is a trademark of Sun Microsystems, Inc.                     *
00031  *                                                                   *
00032  * To submit a bug report, send a comment, or get the latest news on *
00033  * this project and other SAnToS projects, please visit the web-site *
00034  *                http://www.cis.ksu.edu/santos                      *
00035  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00036 import java.util.Stack;
00037 
00038 /** This class implements a skeleton table driven LR parser.  In general,
00039  *  LR parsers are a form of bottom up shift-reduce parsers.  Shift-reduce
00040  *  parsers act by shifting input onto a parse stack until the Symbols 
00041  *  matching the right hand side of a production appear on the top of the 
00042  *  stack.  Once this occurs, a reduce is performed.  This involves removing
00043  *  the Symbols corresponding to the right hand side of the production
00044  *  (the so called "handle") and replacing them with the non-terminal from
00045  *  the left hand side of the production.  <p>
00046  *
00047  *  To control the decision of whether to shift or reduce at any given point, 
00048  *  the parser uses a state machine (the "viable prefix recognition machine" 
00049  *  built by the parser generator).  The current state of the machine is placed
00050  *  on top of the parse stack (stored as part of a Symbol object representing
00051  *  a terminal or non terminal).  The parse action table is consulted 
00052  *  (using the current state and the current lookahead Symbol as indexes) to 
00053  *  determine whether to shift or to reduce.  When the parser shifts, it 
00054  *  changes to a new state by pushing a new Symbol (containing a new state) 
00055  *  onto the stack.  When the parser reduces, it pops the handle (right hand 
00056  *  side of a production) off the stack.  This leaves the parser in the state 
00057  *  it was in before any of those Symbols were matched.  Next the reduce-goto 
00058  *  table is consulted (using the new state and current lookahead Symbol as 
00059  *  indexes) to determine a new state to go to.  The parser then shifts to 
00060  *  this goto state by pushing the left hand side Symbol of the production 
00061  *  (also containing the new state) onto the stack.<p>
00062  *
00063  *  This class actually provides four LR parsers.  The methods parse() and 
00064  *  debug_parse() provide two versions of the main parser (the only difference 
00065  *  being that debug_parse() emits debugging trace messages as it parses).  
00066  *  In addition to these main parsers, the error recovery mechanism uses two 
00067  *  more.  One of these is used to simulate "parsing ahead" in the input 
00068  *  without carrying out actions (to verify that a potential error recovery 
00069  *  has worked), and the other is used to parse through buffered "parse ahead" 
00070  *  input in order to execute all actions and re-synchronize the actual parser 
00071  *  configuration.<p>
00072  *
00073  *  This is an abstract class which is normally filled out by a subclass
00074  *  generated by the JavaCup parser generator.  In addition to supplying
00075  *  the actual parse tables, generated code also supplies methods which 
00076  *  invoke various pieces of user supplied code, provide access to certain
00077  *  special Symbols (e.g., EOF and error), etc.  Specifically, the following
00078  *  abstract methods are normally supplied by generated code:
00079  *  <dl compact>
00080  *  <dt> short[][] production_table()
00081  *  <dd> Provides a reference to the production table (indicating the index of
00082  *       the left hand side non terminal and the length of the right hand side
00083  *       for each production in the grammar).
00084  *  <dt> short[][] action_table()
00085  *  <dd> Provides a reference to the parse action table.
00086  *  <dt> short[][] reduce_table()
00087  *  <dd> Provides a reference to the reduce-goto table.
00088  *  <dt> int start_state()      
00089  *  <dd> Indicates the index of the start state.
00090  *  <dt> int start_production() 
00091  *  <dd> Indicates the index of the starting production.
00092  *  <dt> int EOF_sym() 
00093  *  <dd> Indicates the index of the EOF Symbol.
00094  *  <dt> int error_sym() 
00095  *  <dd> Indicates the index of the error Symbol.
00096  *  <dt> Symbol do_action() 
00097  *  <dd> Executes a piece of user supplied action code.  This always comes at 
00098  *       the point of a reduce in the parse, so this code also allocates and 
00099  *       fills in the left hand side non terminal Symbol object that is to be 
00100  *       pushed onto the stack for the reduce.
00101  *  <dt> void init_actions()
00102  *  <dd> Code to initialize a special object that encapsulates user supplied
00103  *       actions (this object is used by do_action() to actually carry out the 
00104  *       actions).
00105  *  </dl>
00106  *  
00107  *  In addition to these routines that <i>must</i> be supplied by the 
00108  *  generated subclass there are also a series of routines that <i>may</i> 
00109  *  be supplied.  These include:
00110  *  <dl>
00111  *  <dt> Symbol scan()
00112  *  <dd> Used to get the next input Symbol from the scanner.
00113  *  <dt> Scanner getScanner()
00114  *  <dd> Used to provide a scanner for the default implementation of
00115  *       scan().
00116  *  <dt> int error_sync_size()
00117  *  <dd> This determines how many Symbols past the point of an error 
00118  *       must be parsed without error in order to consider a recovery to 
00119  *       be valid.  This defaults to 3.  Values less than 2 are not 
00120  *       recommended.
00121  *  <dt> void report_error(String message, Object info)
00122  *  <dd> This method is called to report an error.  The default implementation
00123  *       simply prints a message to System.err and where the error occurred.
00124  *       This method is often replaced in order to provide a more sophisticated
00125  *       error reporting mechanism.
00126  *  <dt> void report_fatal_error(String message, Object info)
00127  *  <dd> This method is called when a fatal error that cannot be recovered from
00128  *       is encountered.  In the default implementation, it calls 
00129  *       report_error() to emit a message, then throws an exception.
00130  *  <dt> void syntax_error(Symbol cur_token)
00131  *  <dd> This method is called as soon as syntax error is detected (but
00132  *       before recovery is attempted).  In the default implementation it 
00133  *       invokes: report_error("Syntax error", null);
00134  *  <dt> void unrecovered_syntax_error(Symbol cur_token)
00135  *  <dd> This method is called if syntax error recovery fails.  In the default
00136  *       implementation it invokes:<br> 
00137  *         report_fatal_error("Couldn't repair and continue parse", null);
00138  *  </dl>
00139  *
00140  * @see     java_cup.runtime.Symbol
00141  * @see     java_cup.runtime.Symbol
00142  * @see     java_cup.runtime.virtual_parse_stack
00143  * @version last updated: 7/3/96
00144  * @author  Frank Flannery
00145  */
00146 
00147 public abstract class lr_parser {
00148 
00149   /*-----------------------------------------------------------*/
00150   /*--- (Access to) Static (Class) Variables ------------------*/
00151   /*-----------------------------------------------------------*/
00152 
00153   /** The default number of Symbols after an error we much match to consider 
00154    *  it recovered from. 
00155    */
00156   protected final static int _error_sync_size = 3;
00157 
00158   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00159 
00160   /** Internal flag to indicate when parser should quit. */
00161   protected boolean _done_parsing = false;
00162 
00163   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00164   /* Global parse state shared by parse(), error recovery, and 
00165    * debugging routines */
00166   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00167 
00168   /** Indication of the index for top of stack (for use by actions). */
00169   protected int tos;
00170 
00171   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00172 
00173   /** The current lookahead Symbol. */
00174   protected Symbol cur_token;
00175 
00176   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00177 
00178   /** The parse stack itself. */
00179   protected Stack stack = new Stack();
00180 
00181   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00182 
00183   /** Direct reference to the production table. */ 
00184   protected short[][] production_tab;
00185 
00186   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00187 
00188   /** Direct reference to the action table. */
00189   protected short[][] action_tab;
00190 
00191   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00192 
00193   /** Direct reference to the reduce-goto table. */
00194   protected short[][] reduce_tab;
00195 
00196   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00197 
00198   /** This is the scanner object used by the default implementation
00199    *  of scan() to get Symbols.  To avoid name conflicts with existing
00200    *  code, this field is private. [CSA/davidm] */
00201   private Scanner _scanner;
00202 
00203   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00204 
00205   /** Lookahead Symbols used for attempting error recovery "parse aheads". */
00206   protected Symbol lookahead[];
00207 
00208   /** Position in lookahead input buffer used for "parse ahead". */
00209   protected int lookahead_pos;
00210 
00211   /*-----------------------------------------------------------*/
00212   /*--- Constructor(s) ----------------------------------------*/
00213   /*-----------------------------------------------------------*/
00214 
00215   /** Simple constructor. */
00216   public lr_parser()
00217     {
00218       /* nothing to do here */
00219     }
00220   /** Constructor that sets the default scanner. [CSA/davidm] */
00221   public lr_parser(Scanner s) {
00222     this(); /* in case default constructor someday does something */
00223     setScanner(s);
00224   }  
00225   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00226 
00227   /** The action table (supplied by generated subclass).  This table is
00228    *  indexed by state and terminal number indicating what action is to
00229    *  be taken when the parser is in the given state (i.e., the given state 
00230    *  is on top of the stack) and the given terminal is next on the input.  
00231    *  States are indexed using the first dimension, however, the entries for 
00232    *  a given state are compacted and stored in adjacent index, value pairs 
00233    *  which are searched for rather than accessed directly (see get_action()).  
00234    *  The actions stored in the table will be either shifts, reduces, or 
00235    *  errors.  Shifts are encoded as positive values (one greater than the 
00236    *  state shifted to).  Reduces are encoded as negative values (one less 
00237    *  than the production reduced by).  Error entries are denoted by zero. 
00238    * 
00239    * @see java_cup.runtime.lr_parser#get_action
00240    */
00241   public abstract short[][] action_table();  
00242   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00243 
00244   /** Advance to next "parse ahead" input Symbol. Return true if we have 
00245    *  input to advance to, false otherwise. 
00246    */
00247   protected boolean advance_lookahead()
00248     {
00249       /* advance the input location */
00250       lookahead_pos++;
00251 
00252       /* return true if we didn't go off the end */
00253       return lookahead_pos < error_sync_size();
00254     }
00255   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00256 
00257   /** Return the current lookahead in our error "parse ahead" buffer. */
00258   protected Symbol cur_err_token() { return lookahead[lookahead_pos]; }  
00259   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00260 
00261   /** Write a debugging message to System.err for the debugging version 
00262    *  of the parser. 
00263    *
00264    * @param mess the text of the debugging message.
00265    */
00266   public void debug_message(String mess)
00267     {
00268       System.err.println(mess);
00269     }
00270   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00271 
00272   /** Perform a parse with debugging output.  This does exactly the
00273    *  same things as parse(), except that it calls debug_shift() and
00274    *  debug_reduce() when shift and reduce moves are taken by the parser
00275    *  and produces various other debugging messages.  
00276    */
00277   public Symbol debug_parse()
00278     throws java.lang.Exception
00279     {
00280       /* the current action code */
00281       int act;
00282 
00283       /* the Symbol/stack element returned by a reduce */
00284       Symbol lhs_sym = null;
00285 
00286       /* information about production being reduced with */
00287       short handle_size, lhs_sym_num;
00288 
00289       /* set up direct reference to tables to drive the parser */
00290       production_tab = production_table();
00291       action_tab     = action_table();
00292       reduce_tab     = reduce_table();
00293 
00294       debug_message("# Initializing parser");
00295 
00296       /* initialize the action encapsulation object */
00297       init_actions();
00298 
00299       /* do user initialization */
00300       user_init();
00301 
00302       /* the current Symbol */
00303       cur_token = scan(); 
00304 
00305       debug_message("# Current Symbol is #" + cur_token.sym);
00306 
00307       /* push dummy Symbol with start state to get us underway */
00308       stack.removeAllElements();
00309       stack.push(new Symbol(0, start_state()));
00310       tos = 0;
00311 
00312       /* continue until we are told to stop */
00313       for (_done_parsing = false; !_done_parsing; )
00314     {
00315       /* Check current token for freshness. */
00316       if (cur_token.used_by_parser)
00317         throw new Error("Symbol recycling detected (fix your scanner).");
00318 
00319       /* current state is always on the top of the stack */
00320       //debug_stack();
00321 
00322       /* look up action out of the current state with the current input */
00323       act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
00324 
00325       /* decode the action -- > 0 encodes shift */
00326       if (act > 0)
00327         {
00328           /* shift to the encoded state by pushing it on the stack */
00329           cur_token.parse_state = act-1;
00330           cur_token.used_by_parser = true;
00331           debug_shift(cur_token);
00332           stack.push(cur_token);
00333           tos++;
00334 
00335           /* advance to the next Symbol */
00336           cur_token = scan();
00337               debug_message("# Current token is " + cur_token);
00338         }
00339       /* if its less than zero, then it encodes a reduce action */
00340       else if (act < 0)
00341         {
00342           /* perform the action for the reduce */
00343           lhs_sym = do_action((-act)-1, this, stack, tos);
00344 
00345           /* look up information about the production */
00346           lhs_sym_num = production_tab[(-act)-1][0];
00347           handle_size = production_tab[(-act)-1][1];
00348 
00349           debug_reduce((-act)-1, lhs_sym_num, handle_size);
00350 
00351           /* pop the handle off the stack */
00352           for (int i = 0; i < handle_size; i++)
00353         {
00354           stack.pop();
00355           tos--;
00356         }
00357           
00358           /* look up the state to go to from the one popped back to */
00359           act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
00360           debug_message("# Reduce rule: top state " +
00361                  ((Symbol)stack.peek()).parse_state +
00362                  ", lhs sym " + lhs_sym_num + " -> state " + act); 
00363 
00364           /* shift to that state */
00365           lhs_sym.parse_state = act;
00366           lhs_sym.used_by_parser = true;
00367           stack.push(lhs_sym);
00368           tos++;
00369 
00370           debug_message("# Goto state #" + act);
00371         }
00372       /* finally if the entry is zero, we have an error */
00373       else if (act == 0)
00374         {
00375           /* call user syntax error reporting routine */
00376           syntax_error(cur_token);
00377 
00378           /* try to error recover */
00379           if (!error_recovery(true))
00380         {
00381           /* if that fails give up with a fatal syntax error */
00382           unrecovered_syntax_error(cur_token);
00383 
00384           /* just in case that wasn't fatal enough, end parse */
00385           done_parsing();
00386         } else {
00387           lhs_sym = (Symbol)stack.peek();
00388         }
00389         }
00390     }
00391       return lhs_sym;
00392     }
00393   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00394 
00395   /** Do debug output for a reduce. 
00396    *
00397    * @param prod_num  the production we are reducing with.
00398    * @param nt_num    the index of the LHS non terminal.
00399    * @param rhs_size  the size of the RHS.
00400    */
00401   public void debug_reduce(int prod_num, int nt_num, int rhs_size)
00402     {
00403       debug_message("# Reduce with prod #" + prod_num + " [NT=" + nt_num + 
00404                 ", " + "SZ=" + rhs_size + "]");
00405     }
00406   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00407 
00408   /** Do debug output for shift. 
00409    *
00410    * @param shift_tkn the Symbol being shifted onto the stack.
00411    */
00412   public void debug_shift(Symbol shift_tkn)
00413     {
00414       debug_message("# Shift under term #" + shift_tkn.sym + 
00415             " to state #" + shift_tkn.parse_state);
00416     }
00417   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00418 
00419   /** Do debug output for stack state. [CSA]
00420    */
00421   public void debug_stack() {
00422       StringBuffer sb=new StringBuffer("## STACK:");
00423       for (int i=0; i<stack.size(); i++) {
00424       Symbol s = (Symbol) stack.elementAt(i);
00425       sb.append(" <state "+s.parse_state+", sym "+s.sym+">");
00426       if ((i%3)==2 || (i==(stack.size()-1))) {
00427           debug_message(sb.toString());
00428           sb = new StringBuffer("         ");
00429       }
00430       }
00431   }  
00432   /*-----------------------------------------------------------*/
00433   /*--- General Methods ---------------------------------------*/
00434   /*-----------------------------------------------------------*/
00435 
00436   /** Perform a bit of user supplied action code (supplied by generated 
00437    *  subclass).  Actions are indexed by an internal action number assigned
00438    *  at parser generation time.
00439    *
00440    * @param act_num   the internal index of the action to be performed.
00441    * @param parser    the parser object we are acting for.
00442    * @param stack     the parse stack of that object.
00443    * @param top       the index of the top element of the parse stack.
00444    */
00445   public abstract Symbol do_action(
00446     int       act_num, 
00447     lr_parser parser, 
00448     Stack     stack, 
00449     int       top) 
00450     throws java.lang.Exception;
00451   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00452 
00453   /** This method is called to indicate that the parser should quit.  This is 
00454    *  normally called by an accept action, but can be used to cancel parsing 
00455    *  early in other circumstances if desired. 
00456    */
00457   public void done_parsing()
00458     {
00459       _done_parsing = true;
00460     }
00461   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00462 
00463   /** Dump the parse stack for debugging purposes. */
00464   public void dump_stack()
00465     {
00466       if (stack == null)
00467     {
00468       debug_message("# Stack dump requested, but stack is null");
00469       return;
00470     }
00471 
00472       debug_message("============ Parse Stack Dump ============");
00473 
00474       /* dump the stack */
00475       for (int i=0; i<stack.size(); i++)
00476     {
00477       debug_message("Symbol: " + ((Symbol)stack.elementAt(i)).sym +
00478             " State: " + ((Symbol)stack.elementAt(i)).parse_state);
00479     }
00480       debug_message("==========================================");
00481     }
00482   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00483 
00484   /** The index of the end of file terminal Symbol (supplied by generated 
00485    *  subclass). 
00486    */
00487   public abstract int EOF_sym();  
00488   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00489   /* Error recovery code */
00490   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00491 
00492   /** Attempt to recover from a syntax error.  This returns false if recovery 
00493    *  fails, true if it succeeds.  Recovery happens in 4 steps.  First we
00494    *  pop the parse stack down to a point at which we have a shift out
00495    *  of the top-most state on the error Symbol.  This represents the
00496    *  initial error recovery configuration.  If no such configuration is
00497    *  found, then we fail.  Next a small number of "lookahead" or "parse
00498    *  ahead" Symbols are read into a buffer.  The size of this buffer is 
00499    *  determined by error_sync_size() and determines how many Symbols beyond
00500    *  the error must be matched to consider the recovery a success.  Next, 
00501    *  we begin to discard Symbols in attempt to get past the point of error
00502    *  to a point where we can continue parsing.  After each Symbol, we attempt 
00503    *  to "parse ahead" though the buffered lookahead Symbols.  The "parse ahead"
00504    *  process simulates that actual parse, but does not modify the real 
00505    *  parser's configuration, nor execute any actions. If we can  parse all 
00506    *  the stored Symbols without error, then the recovery is considered a 
00507    *  success.  Once a successful recovery point is determined, we do an
00508    *  actual parse over the stored input -- modifying the real parse 
00509    *  configuration and executing all actions.  Finally, we return the the 
00510    *  normal parser to continue with the overall parse.
00511    *
00512    * @param debug should we produce debugging messages as we parse.
00513    */
00514   protected boolean error_recovery(boolean debug)
00515     throws java.lang.Exception
00516     {
00517       if (debug) debug_message("# Attempting error recovery");
00518 
00519       /* first pop the stack back into a state that can shift on error and 
00520      do that shift (if that fails, we fail) */
00521       if (!find_recovery_config(debug))
00522     {
00523       if (debug) debug_message("# Error recovery fails");
00524       return false;
00525     }
00526 
00527       /* read ahead to create lookahead we can parse multiple times */
00528       read_lookahead();
00529 
00530       /* repeatedly try to parse forward until we make it the required dist */
00531       for (;;)
00532     {
00533       /* try to parse forward, if it makes it, bail out of loop */
00534       if (debug) debug_message("# Trying to parse ahead");
00535       if (try_parse_ahead(debug))
00536         {
00537           break;
00538         }
00539 
00540       /* if we are now at EOF, we have failed */
00541       if (lookahead[0].sym == EOF_sym()) 
00542         {
00543           if (debug) debug_message("# Error recovery fails at EOF");
00544           return false;
00545         }
00546 
00547       /* otherwise, we consume another Symbol and try again */
00548       // BUG FIX by Bruce Hutton
00549       // Computer Science Department, University of Auckland,
00550       // Auckland, New Zealand.
00551       // It is the first token that is being consumed, not the one 
00552       // we were up to parsing
00553       if (debug) 
00554           debug_message("# Consuming Symbol #" + lookahead[ 0 ].sym);
00555       restart_lookahead();
00556     }
00557 
00558       /* we have consumed to a point where we can parse forward */
00559       if (debug) debug_message("# Parse-ahead ok, going back to normal parse");
00560 
00561       /* do the real parse (including actions) across the lookahead */
00562       parse_lookahead(debug);
00563 
00564       /* we have success */
00565       return true;
00566     }
00567   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00568 
00569   /** The index of the special error Symbol (supplied by generated subclass). */
00570   public abstract int error_sym();  
00571   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00572 
00573   /** The number of Symbols after an error we much match to consider it 
00574    *  recovered from. 
00575    */
00576   protected int error_sync_size() {return _error_sync_size; }  
00577   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00578 
00579   /** Put the (real) parse stack into error recovery configuration by 
00580    *  popping the stack down to a state that can shift on the special 
00581    *  error Symbol, then doing the shift.  If no suitable state exists on 
00582    *  the stack we return false 
00583    *
00584    * @param debug should we produce debugging messages as we parse.
00585    */
00586   protected boolean find_recovery_config(boolean debug)
00587     {
00588       Symbol error_token;
00589       int act;
00590 
00591       if (debug) debug_message("# Finding recovery state on stack");
00592 
00593       /* Remember the right-position of the top symbol on the stack */
00594       int right_pos = ((Symbol)stack.peek()).right;
00595       int left_pos  = ((Symbol)stack.peek()).left;
00596 
00597       /* pop down until we can shift under error Symbol */
00598       while (!shift_under_error())
00599     {
00600       /* pop the stack */
00601       if (debug) 
00602         debug_message("# Pop stack by one, state was # " +
00603                       ((Symbol)stack.peek()).parse_state);
00604           left_pos = ((Symbol)stack.pop()).left;    
00605       tos--;
00606 
00607       /* if we have hit bottom, we fail */
00608       if (stack.empty()) 
00609         {
00610           if (debug) debug_message("# No recovery state found on stack");
00611           return false;
00612         }
00613     }
00614 
00615       /* state on top of the stack can shift under error, find the shift */
00616       act = get_action(((Symbol)stack.peek()).parse_state, error_sym());
00617       if (debug) 
00618     {
00619       debug_message("# Recover state found (#" + 
00620             ((Symbol)stack.peek()).parse_state + ")");
00621       debug_message("# Shifting on error to state #" + (act-1));
00622     }
00623 
00624       /* build and shift a special error Symbol */
00625       error_token = new Symbol(error_sym(), left_pos, right_pos);
00626       error_token.parse_state = act-1;
00627       error_token.used_by_parser = true;
00628       stack.push(error_token);
00629       tos++;
00630 
00631       return true;
00632     }
00633   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00634 
00635   /** Fetch an action from the action table.  The table is broken up into
00636    *  rows, one per state (rows are indexed directly by state number).  
00637    *  Within each row, a list of index, value pairs are given (as sequential
00638    *  entries in the table), and the list is terminated by a default entry 
00639    *  (denoted with a Symbol index of -1).  To find the proper entry in a row 
00640    *  we do a linear or binary search (depending on the size of the row).  
00641    *
00642    * @param state the state index of the action being accessed.
00643    * @param sym   the Symbol index of the action being accessed.
00644    */
00645   protected final short get_action(int state, int sym)
00646     {
00647       short tag;
00648       int first, last, probe;
00649       short[] row = action_tab[state];
00650 
00651       /* linear search if we are < 10 entries */
00652       if (row.length < 20)
00653         for (probe = 0; probe < row.length; probe++)
00654       {
00655         /* is this entry labeled with our Symbol or the default? */
00656         tag = row[probe++];
00657         if (tag == sym || tag == -1)
00658           {
00659             /* return the next entry */
00660             return row[probe];
00661           }
00662       }
00663       /* otherwise binary search */
00664       else
00665     {
00666       first = 0; 
00667       last = (row.length-1)/2 - 1;  /* leave out trailing default entry */
00668       while (first <= last)
00669         {
00670           probe = (first+last)/2;
00671           if (sym == row[probe*2])
00672         return row[probe*2+1];
00673           else if (sym > row[probe*2])
00674         first = probe+1;
00675           else
00676             last = probe-1;
00677         }
00678 
00679       /* not found, use the default at the end */
00680       return row[row.length-1];
00681     }
00682 
00683       /* shouldn't happened, but if we run off the end we return the 
00684      default (error == 0) */
00685       return 0;
00686     }
00687   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00688 
00689   /** Fetch a state from the reduce-goto table.  The table is broken up into
00690    *  rows, one per state (rows are indexed directly by state number).  
00691    *  Within each row, a list of index, value pairs are given (as sequential
00692    *  entries in the table), and the list is terminated by a default entry 
00693    *  (denoted with a Symbol index of -1).  To find the proper entry in a row 
00694    *  we do a linear search.  
00695    *
00696    * @param state the state index of the entry being accessed.
00697    * @param sym   the Symbol index of the entry being accessed.
00698    */
00699   protected final short get_reduce(int state, int sym)
00700     {
00701       short tag;
00702       short[] row = reduce_tab[state];
00703 
00704       /* if we have a null row we go with the default */
00705       if (row == null)
00706         return -1;
00707 
00708       for (int probe = 0; probe < row.length; probe++)
00709     {
00710       /* is this entry labeled with our Symbol or the default? */
00711       tag = row[probe++];
00712       if (tag == sym || tag == -1)
00713         {
00714           /* return the next entry */
00715           return row[probe];
00716         }
00717     }
00718       /* if we run off the end we return the default (error == -1) */
00719       return -1;
00720     }
00721   /**
00722    * Simple accessor method to get the default scanner.
00723    */
00724   public Scanner getScanner() { return _scanner; }  
00725   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00726 
00727   /** Initialize the action object.  This is called before the parser does
00728    *  any parse actions. This is filled in by generated code to create
00729    *  an object that encapsulates all action code. 
00730    */ 
00731   protected abstract void init_actions() throws java.lang.Exception;  
00732   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00733 
00734   /** This method provides the main parsing routine.  It returns only when 
00735    *  done_parsing() has been called (typically because the parser has 
00736    *  accepted, or a fatal error has been reported).  See the header 
00737    *  documentation for the class regarding how shift/reduce parsers operate
00738    *  and how the various tables are used.
00739    */
00740   public Symbol parse() throws java.lang.Exception
00741     {
00742       /* the current action code */
00743       int act;
00744 
00745       /* the Symbol/stack element returned by a reduce */
00746       Symbol lhs_sym = null;
00747 
00748       /* information about production being reduced with */
00749       short handle_size, lhs_sym_num;
00750 
00751       /* set up direct reference to tables to drive the parser */
00752 
00753       production_tab = production_table();
00754       action_tab     = action_table();
00755       reduce_tab     = reduce_table();
00756 
00757       /* initialize the action encapsulation object */
00758       init_actions();
00759 
00760       /* do user initialization */
00761       user_init();
00762 
00763       /* get the first token */
00764       cur_token = scan(); 
00765 
00766       /* push dummy Symbol with start state to get us underway */
00767       stack.removeAllElements();
00768       stack.push(new Symbol(0, start_state()));
00769       tos = 0;
00770 
00771       /* continue until we are told to stop */
00772       for (_done_parsing = false; !_done_parsing; )
00773     {
00774       /* Check current token for freshness. */
00775       if (cur_token.used_by_parser)
00776         throw new Error("Symbol recycling detected (fix your scanner).");
00777 
00778       /* current state is always on the top of the stack */
00779 
00780       /* look up action out of the current state with the current input */
00781       act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
00782 
00783       /* decode the action -- > 0 encodes shift */
00784       if (act > 0)
00785         {
00786           /* shift to the encoded state by pushing it on the stack */
00787           cur_token.parse_state = act-1;
00788           cur_token.used_by_parser = true;
00789           stack.push(cur_token);
00790           tos++;
00791 
00792           /* advance to the next Symbol */
00793           cur_token = scan();
00794         }
00795       /* if its less than zero, then it encodes a reduce action */
00796       else if (act < 0)
00797         {
00798           /* perform the action for the reduce */
00799           lhs_sym = do_action((-act)-1, this, stack, tos);
00800 
00801           /* look up information about the production */
00802           lhs_sym_num = production_tab[(-act)-1][0];
00803           handle_size = production_tab[(-act)-1][1];
00804 
00805           /* pop the handle off the stack */
00806           for (int i = 0; i < handle_size; i++)
00807         {
00808           stack.pop();
00809           tos--;
00810         }
00811           
00812           /* look up the state to go to from the one popped back to */
00813           act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
00814 
00815           /* shift to that state */
00816           lhs_sym.parse_state = act;
00817           lhs_sym.used_by_parser = true;
00818           stack.push(lhs_sym);
00819           tos++;
00820         }
00821       /* finally if the entry is zero, we have an error */
00822       else if (act == 0)
00823         {
00824           /* call user syntax error reporting routine */
00825           syntax_error(cur_token);
00826 
00827           /* try to error recover */
00828           if (!error_recovery(false))
00829         {
00830           /* if that fails give up with a fatal syntax error */
00831           unrecovered_syntax_error(cur_token);
00832 
00833           /* just in case that wasn't fatal enough, end parse */
00834           done_parsing();
00835         } else {
00836           lhs_sym = (Symbol)stack.peek();
00837         }
00838         }
00839     }
00840       return lhs_sym;
00841     }
00842   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00843 
00844   /** Parse forward using stored lookahead Symbols.  In this case we have
00845    *  already verified that parsing will make it through the stored lookahead
00846    *  Symbols and we are now getting back to the point at which we can hand
00847    *  control back to the normal parser.  Consequently, this version of the
00848    *  parser performs all actions and modifies the real parse configuration.  
00849    *  This returns once we have consumed all the stored input or we accept.
00850    *
00851    * @param debug should we produce debugging messages as we parse.
00852    */
00853   protected void parse_lookahead(boolean debug)
00854     throws java.lang.Exception
00855     {
00856       /* the current action code */
00857       int act;
00858 
00859       /* the Symbol/stack element returned by a reduce */
00860       Symbol lhs_sym = null;
00861 
00862       /* information about production being reduced with */
00863       short handle_size, lhs_sym_num;
00864 
00865       /* restart the saved input at the beginning */
00866       lookahead_pos = 0;
00867 
00868       if (debug) 
00869     {
00870       debug_message("# Reparsing saved input with actions");
00871       debug_message("# Current Symbol is #" + cur_err_token().sym);
00872       debug_message("# Current state is #" + 
00873             ((Symbol)stack.peek()).parse_state);
00874     }
00875 
00876       /* continue until we accept or have read all lookahead input */
00877       while(!_done_parsing)
00878     {
00879       /* current state is always on the top of the stack */
00880 
00881       /* look up action out of the current state with the current input */
00882       act = 
00883         get_action(((Symbol)stack.peek()).parse_state, cur_err_token().sym);
00884 
00885       /* decode the action -- > 0 encodes shift */
00886       if (act > 0)
00887         {
00888           /* shift to the encoded state by pushing it on the stack */
00889           cur_err_token().parse_state = act-1;
00890           cur_err_token().used_by_parser = true;
00891           if (debug) debug_shift(cur_err_token());
00892           stack.push(cur_err_token());
00893           tos++;
00894 
00895           /* advance to the next Symbol, if there is none, we are done */
00896           if (!advance_lookahead()) 
00897         {
00898           if (debug) debug_message("# Completed reparse");
00899 
00900           /* scan next Symbol so we can continue parse */
00901           // BUGFIX by Chris Harris <ckharris@ucsd.edu>:
00902           //   correct a one-off error by commenting out
00903           //   this next line.
00904           /*cur_token = scan();*/
00905 
00906           /* go back to normal parser */
00907           return;
00908         }
00909           
00910           if (debug) 
00911         debug_message("# Current Symbol is #" + cur_err_token().sym);
00912         }
00913       /* if its less than zero, then it encodes a reduce action */
00914       else if (act < 0)
00915         {
00916           /* perform the action for the reduce */
00917           lhs_sym = do_action((-act)-1, this, stack, tos);
00918 
00919           /* look up information about the production */
00920           lhs_sym_num = production_tab[(-act)-1][0];
00921           handle_size = production_tab[(-act)-1][1];
00922 
00923           if (debug) debug_reduce((-act)-1, lhs_sym_num, handle_size);
00924 
00925           /* pop the handle off the stack */
00926           for (int i = 0; i < handle_size; i++)
00927         {
00928           stack.pop();
00929           tos--;
00930         }
00931           
00932           /* look up the state to go to from the one popped back to */
00933           act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
00934 
00935           /* shift to that state */
00936           lhs_sym.parse_state = act;
00937           lhs_sym.used_by_parser = true;
00938           stack.push(lhs_sym);
00939           tos++;
00940            
00941           if (debug) debug_message("# Goto state #" + act);
00942 
00943         }
00944       /* finally if the entry is zero, we have an error 
00945          (shouldn't happen here, but...)*/
00946       else if (act == 0)
00947         {
00948           report_fatal_error("Syntax error", lhs_sym);
00949           return;
00950         }
00951     }
00952 
00953     
00954     }
00955   /*-----------------------------------------------------------*/
00956   /*--- (Access to) Instance Variables ------------------------*/
00957   /*-----------------------------------------------------------*/
00958 
00959   /** Table of production information (supplied by generated subclass).
00960    *  This table contains one entry per production and is indexed by 
00961    *  the negative-encoded values (reduce actions) in the action_table.  
00962    *  Each entry has two parts, the index of the non-terminal on the 
00963    *  left hand side of the production, and the number of Symbols 
00964    *  on the right hand side. 
00965    */
00966   public abstract short[][] production_table();  
00967   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00968 
00969   /** Read from input to establish our buffer of "parse ahead" lookahead 
00970    *  Symbols. 
00971    */
00972   protected void read_lookahead() throws java.lang.Exception
00973     {
00974       /* create the lookahead array */
00975       lookahead = new Symbol[error_sync_size()];
00976 
00977       /* fill in the array */
00978       for (int i = 0; i < error_sync_size(); i++)
00979     {
00980       lookahead[i] = cur_token;
00981       cur_token = scan();
00982     }
00983 
00984       /* start at the beginning */
00985       lookahead_pos = 0;
00986     }
00987   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
00988 
00989   /** The reduce-goto table (supplied by generated subclass).  This
00990    *  table is indexed by state and non-terminal number and contains
00991    *  state numbers.  States are indexed using the first dimension, however,
00992    *  the entries for a given state are compacted and stored in adjacent
00993    *  index, value pairs which are searched for rather than accessed 
00994    *  directly (see get_reduce()).  When a reduce occurs, the handle 
00995    *  (corresponding to the RHS of the matched production) is popped off 
00996    *  the stack.  The new top of stack indicates a state.  This table is 
00997    *  then indexed by that state and the LHS of the reducing production to 
00998    *  indicate where to "shift" to. 
00999    *
01000    * @see java_cup.runtime.lr_parser#get_reduce
01001    */
01002   public abstract short[][] reduce_table();  
01003   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01004 
01005   /** Report a non fatal error (or warning).  This method takes a message 
01006    *  string and an additional object (to be used by specializations 
01007    *  implemented in subclasses).  Here in the base class a very simple 
01008    *  implementation is provided which simply prints the message to 
01009    *  System.err. 
01010    *
01011    * @param message an error message.
01012    * @param info    an extra object reserved for use by specialized subclasses.
01013    */
01014   public void report_error(String message, Object info)
01015     {
01016       System.err.print(message);
01017       if (info instanceof Symbol)
01018     if (((Symbol)info).left != -1)
01019     System.err.println(" at character " + ((Symbol)info).left + 
01020                " of input");
01021     else System.err.println("");
01022       else System.err.println("");
01023     }
01024   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01025 
01026   /** Report a fatal error.  This method takes a  message string and an 
01027    *  additional object (to be used by specializations implemented in 
01028    *  subclasses).  Here in the base class a very simple implementation 
01029    *  is provided which reports the error then throws an exception. 
01030    *
01031    * @param message an error message.
01032    * @param info    an extra object reserved for use by specialized subclasses.
01033    */
01034   public void report_fatal_error(
01035     String   message, 
01036     Object   info)
01037     throws java.lang.Exception
01038     {
01039       /* stop parsing (not really necessary since we throw an exception, but) */
01040       done_parsing();
01041 
01042       /* use the normal error message reporting to put out the message */
01043       report_error(message, info);
01044 
01045       /* throw an exception */
01046       throw new Exception("Can't recover from previous error(s)");
01047     }
01048   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01049 
01050   /** Reset the parse ahead input to one Symbol past where we started error 
01051    *  recovery (this consumes one new Symbol from the real input). 
01052    */
01053   protected void restart_lookahead() throws java.lang.Exception
01054     {
01055       /* move all the existing input over */
01056       for (int i = 1; i < error_sync_size(); i++)
01057     lookahead[i-1] = lookahead[i];
01058 
01059       /* read a new Symbol into the last spot */
01060       // BUG Fix by Bruce Hutton
01061       // Computer Science Department, University of Auckland,
01062       // Auckland, New Zealand. [applied 5-sep-1999 by csa]
01063       // The following two lines were out of order!!
01064       lookahead[error_sync_size()-1] = cur_token;
01065       cur_token = scan();
01066 
01067       /* reset our internal position marker */
01068       lookahead_pos = 0;
01069     }
01070   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01071 
01072   /** Get the next Symbol from the input (supplied by generated subclass).
01073    *  Once end of file has been reached, all subsequent calls to scan 
01074    *  should return an EOF Symbol (which is Symbol number 0).  By default
01075    *  this method returns getScanner().next_token(); this implementation
01076    *  can be overriden by the generated parser using the code declared in
01077    *  the "scan with" clause.  Do not recycle objects; every call to
01078    *  scan() should return a fresh object.
01079    */
01080   public Symbol scan() throws java.lang.Exception {
01081     Symbol sym = getScanner().next_token();
01082     return (sym!=null) ? sym : new Symbol(EOF_sym());
01083   }  
01084   /**
01085    * Simple accessor method to set the default scanner.
01086    */
01087   public void setScanner(Scanner s) { _scanner = s; }  
01088   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01089 
01090   /** Determine if we can shift under the special error Symbol out of the 
01091    *  state currently on the top of the (real) parse stack. 
01092    */
01093   protected boolean shift_under_error()
01094     {
01095       /* is there a shift under error Symbol */
01096       return get_action(((Symbol)stack.peek()).parse_state, error_sym()) > 0;
01097     }
01098   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01099 
01100   /** The index of the start production (supplied by generated subclass). */
01101   public abstract int start_production();  
01102   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01103 
01104   /** The index of the start state (supplied by generated subclass). */
01105   public abstract int start_state();  
01106   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01107 
01108   /** This method is called when a syntax error has been detected and recovery 
01109    *  is about to be invoked.  Here in the base class we just emit a 
01110    *  "Syntax error" error message.  
01111    *
01112    * @param cur_token the current lookahead Symbol.
01113    */
01114   public void syntax_error(Symbol cur_token)
01115     {
01116       report_error("Syntax error", cur_token);
01117     }
01118   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01119 
01120   /** Do a simulated parse forward (a "parse ahead") from the current 
01121    *  stack configuration using stored lookahead input and a virtual parse
01122    *  stack.  Return true if we make it all the way through the stored 
01123    *  lookahead input without error. This basically simulates the action of 
01124    *  parse() using only our saved "parse ahead" input, and not executing any 
01125    *  actions.
01126    *
01127    * @param debug should we produce debugging messages as we parse.
01128    */
01129   protected boolean try_parse_ahead(boolean debug)
01130     throws java.lang.Exception
01131     {
01132       int act;
01133       short lhs, rhs_size;
01134 
01135       /* create a virtual stack from the real parse stack */
01136       virtual_parse_stack vstack = new virtual_parse_stack(stack);
01137 
01138       /* parse until we fail or get past the lookahead input */
01139       for (;;)
01140     {
01141       /* look up the action from the current state (on top of stack) */
01142       act = get_action(vstack.top(), cur_err_token().sym);
01143 
01144       /* if its an error, we fail */
01145       if (act == 0) return false;
01146 
01147       /* > 0 encodes a shift */
01148       if (act > 0)
01149         {
01150           /* push the new state on the stack */
01151           vstack.push(act-1);
01152 
01153           if (debug) debug_message("# Parse-ahead shifts Symbol #" + 
01154                cur_err_token().sym + " into state #" + (act-1));
01155 
01156           /* advance simulated input, if we run off the end, we are done */
01157           if (!advance_lookahead()) return true;
01158         }
01159       /* < 0 encodes a reduce */
01160       else
01161         {
01162           /* if this is a reduce with the start production we are done */
01163           if ((-act)-1 == start_production()) 
01164         {
01165           if (debug) debug_message("# Parse-ahead accepts");
01166           return true;
01167         }
01168 
01169           /* get the lhs Symbol and the rhs size */
01170           lhs = production_tab[(-act)-1][0];
01171           rhs_size = production_tab[(-act)-1][1];
01172 
01173           /* pop handle off the stack */
01174           for (int i = 0; i < rhs_size; i++)
01175         vstack.pop();
01176 
01177           if (debug) 
01178         debug_message("# Parse-ahead reduces: handle size = " + 
01179               rhs_size + " lhs = #" + lhs + " from state #" + vstack.top());
01180 
01181           /* look up goto and push it onto the stack */
01182           vstack.push(get_reduce(vstack.top(), lhs));
01183           if (debug) 
01184         debug_message("# Goto state #" + vstack.top());
01185         }
01186     }
01187     }
01188   /*-----------------------------------------------------------*/
01189 
01190   /** Utility function: unpacks parse tables from strings */
01191   protected static short[][] unpackFromStrings(String[] sa)
01192     {
01193       // Concatanate initialization strings.
01194       StringBuffer sb = new StringBuffer(sa[0]);
01195       for (int i=1; i<sa.length; i++)
01196     sb.append(sa[i]);
01197       int n=0; // location in initialization string
01198       int size1 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
01199       short[][] result = new short[size1][];
01200       for (int i=0; i<size1; i++) {
01201         int size2 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
01202         result[i] = new short[size2];
01203         for (int j=0; j<size2; j++)
01204           result[i][j] = (short) (sb.charAt(n++)-2);
01205       }
01206       return result;
01207     }
01208   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01209 
01210   /** This method is called if it is determined that syntax error recovery 
01211    *  has been unsuccessful.  Here in the base class we report a fatal error. 
01212    *
01213    * @param cur_token the current lookahead Symbol.
01214    */
01215   public void unrecovered_syntax_error(Symbol cur_token)
01216     throws java.lang.Exception
01217     {
01218       report_fatal_error("Couldn't repair and continue parse", cur_token);
01219     }
01220   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
01221 
01222   /** User code for initialization inside the parser.  Typically this 
01223    *  initializes the scanner.  This is called before the parser requests
01224    *  the first Symbol.  Here this is just a placeholder for subclasses that 
01225    *  might need this and we perform no action.   This method is normally
01226    *  overridden by the generated code using this contents of the "init with"
01227    *  clause as its body.
01228    */
01229   public void user_init() throws java.lang.Exception { }  
01230 }

Generated at Thu Feb 7 06:50:05 2002 for Bandera by doxygen1.2.10 written by Dimitri van Heesch, © 1997-2001