Main Page   Packages   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

FindPattern.java

00001 package de.fub.bytecode.generic;
00002 
00003 import de.fub.bytecode.Constants;
00004 import de.fub.bytecode.classfile.*;
00005 import gnu.regexp.*;
00006 
00007 /** 
00008  * This class is an utility to search for given patterns, i.e., regular expressions
00009  * in an instruction list. This can be used in order to implement a
00010  * peep hole optimizer that looks for code patterns and replaces them with
00011  * faster equivalents.
00012  *
00013  * This class internally uses the package
00014  * <a href="http://www.cacas.org/~wes/java/">
00015  * gnu.regexp</a> to search for regular expressions.
00016  *
00017  * @version $Id: FindPattern.java,v 1.1.1.1 2002/01/24 03:41:39 pserver Exp $
00018  * @author  <A HREF="http://www.inf.fu-berlin.de/~dahm">M. Dahm</A>
00019  * @see     Instruction
00020  * @see     InstructionList
00021  * @see     CodeConstraint
00022  */
00023 public class FindPattern {
00024   private static final int OFFSET     = 32767; // char + OFFSET is outside of LATIN-1
00025   private static final int NO_OPCODES = 256;   // Potential number, some are not used
00026 
00027   private static final String[] patterns = {
00028     "instruction", "branchinstruction", "if_icmp__", "if__", "push",
00029     "iload__", "aload__", "fload__", "dload__", "lload__",
00030     "istore__", "astore__", "fstore__", "dstore__", "lstore__",
00031     "invokeinstruction", "returninstruction", "ifinstruction"
00032   };
00033 
00034   private static String[] pattern_map; // filled in by static initializer, see below
00035 
00036   private InstructionList     il;
00037   private String              il_string;    // instruction list as string
00038   private InstructionHandle[] handles;      // map instruction list to array
00039   private int                 match_length; // Number of matched instructions
00040   private int                 matched_from; // Index of match in instruction list
00041 
00042   /* Static initializer.
00043    */
00044   static {
00045 
00046   String instruction_pattern, binstruction_pattern, if_icmp_pattern, if_pattern,
00047     push_pattern, iload_pattern, aload_pattern, fload_pattern,
00048     dload_pattern, lload_pattern, istore_pattern, astore_pattern,
00049     fstore_pattern, dstore_pattern, lstore_pattern, invoke_pattern, return_pattern,
00050     if_pattern2;
00051 
00052   StringBuffer buf;
00053 
00054     /* Make instruction string
00055      */
00056     buf = new StringBuffer("(");
00057 
00058     for(short i=0; i < NO_OPCODES; i++) {
00059       if(Constants.NO_OF_OPERANDS[i] != Constants.UNDEFINED) { // Not an invalid opcode
00060     buf.append(makeChar(i));
00061 
00062     if(i < NO_OPCODES - 1)
00063       buf.append('|');
00064       }
00065     }
00066     buf.append(')');
00067 
00068     instruction_pattern = buf.toString();
00069 
00070     /* Make BranchInstruction string
00071      */
00072     appendPatterns(buf = new StringBuffer("("), Constants.IFEQ, Constants.LOOKUPSWITCH);
00073     buf.append('|');
00074     appendPatterns(buf, Constants.IFNULL, Constants.JSR_W);
00075     buf.append(')');
00076     binstruction_pattern = buf.toString();
00077  
00078     /* Make IF_ICMP__ pattern string
00079      */
00080     appendPatterns(buf = new StringBuffer("("), Constants.IF_ICMPEQ, Constants.IF_ICMPLE);
00081     buf.append(')');
00082     if_icmp_pattern = buf.toString();
00083 
00084     /* Make IF__ pattern string
00085      */
00086     appendPatterns(buf = new StringBuffer("("), Constants.IFEQ, Constants.IFLE);
00087     buf.append(')');
00088     if_pattern = buf.toString();
00089 
00090     /* Make PUSH pattern string
00091      */
00092     appendPatterns(buf = new StringBuffer("("), Constants.ACONST_NULL,Constants. LDC2_W);
00093     buf.append(')');
00094     push_pattern = buf.toString();
00095 
00096     /* Make ILOAD__ pattern string
00097      */
00098     appendPatterns(buf = new StringBuffer("("), Constants.ILOAD_0, Constants.ILOAD_3);
00099     buf.append('|');
00100     buf.append(makeChar(Constants.ILOAD));
00101     buf.append(')');
00102     iload_pattern = buf.toString();
00103 
00104     /* Make ALOAD__ pattern string
00105      */
00106     appendPatterns(buf = new StringBuffer("("), Constants.ALOAD_0, Constants.ALOAD_3);
00107     buf.append('|');
00108     buf.append(makeChar(Constants.ALOAD));
00109     buf.append(')');
00110     aload_pattern = buf.toString();
00111 
00112     /* Make FLOAD__ pattern string
00113      */
00114     appendPatterns(buf = new StringBuffer("("), Constants.FLOAD_0, Constants.FLOAD_3);
00115     buf.append('|');
00116     buf.append(makeChar(Constants.FLOAD));
00117     buf.append(')');
00118     fload_pattern = buf.toString();
00119 
00120     /* Make DLOAD__ pattern string
00121      */
00122     appendPatterns(buf = new StringBuffer("("), Constants.DLOAD_0, Constants.DLOAD_3);
00123     buf.append('|');
00124     buf.append(makeChar(Constants.DLOAD));
00125     buf.append(')');
00126     dload_pattern = buf.toString();
00127 
00128     /* Make LLOAD__ pattern string
00129      */
00130     appendPatterns(buf = new StringBuffer("("), Constants.LLOAD_0, Constants.LLOAD_3);
00131     buf.append('|');
00132     buf.append(makeChar(Constants.LLOAD));
00133     buf.append(')');
00134     lload_pattern = buf.toString();
00135 
00136     /* Make ISTORE__ pattern string
00137      */
00138     appendPatterns(buf = new StringBuffer("("), Constants.ISTORE_0, Constants.ISTORE_3);
00139     buf.append('|');
00140     buf.append(makeChar(Constants.ISTORE));
00141     buf.append(')');
00142     istore_pattern = buf.toString();
00143 
00144     /* Make ASTORE__ pattern string
00145      */
00146     appendPatterns(buf = new StringBuffer("("), Constants.ASTORE_0, Constants.ASTORE_3);
00147     buf.append('|');
00148     buf.append(makeChar(Constants.ASTORE));
00149     buf.append(')');
00150     astore_pattern = buf.toString();
00151 
00152     /* Make FSTORE__ pattern string
00153      */
00154     appendPatterns(buf = new StringBuffer("("), Constants.FSTORE_0, Constants.FSTORE_3);
00155     buf.append('|');
00156     buf.append(makeChar(Constants.FSTORE));
00157     buf.append(')');
00158     fstore_pattern = buf.toString();
00159 
00160     /* Make DSTORE__ pattern string
00161      */
00162     appendPatterns(buf = new StringBuffer("("), Constants.DSTORE_0, Constants.DSTORE_3);
00163     buf.append('|');
00164     buf.append(makeChar(Constants.DSTORE));
00165     buf.append(')');
00166     dstore_pattern = buf.toString();
00167 
00168     /* Make LSTORE__ pattern string
00169      */
00170     appendPatterns(buf = new StringBuffer("("), Constants.LSTORE_0, Constants.LSTORE_3);
00171     buf.append('|');
00172     buf.append(makeChar(Constants.LSTORE));
00173     buf.append(')');
00174     lstore_pattern = buf.toString();
00175 
00176     /* Make INVOKE pattern string
00177      */
00178     appendPatterns(buf = new StringBuffer("("), Constants.INVOKEVIRTUAL, Constants.INVOKEINTERFACE);
00179     buf.append(')');
00180     invoke_pattern = buf.toString();
00181 
00182     /* Make RETURN pattern string
00183      */
00184     appendPatterns(buf = new StringBuffer("("), Constants.IRETURN, Constants.RETURN);
00185     buf.append(')');
00186     return_pattern = buf.toString();
00187 
00188     /* Make IfInstruction pattern string
00189      */
00190     appendPatterns(buf = new StringBuffer("("), Constants.IFEQ, Constants.IF_ACMPNE);
00191     buf.append('|');
00192     buf.append(makeChar(Constants.IFNULL));
00193     buf.append('|');
00194     buf.append(makeChar(Constants.IFNONNULL));
00195     buf.append(')');
00196     if_pattern2 = buf.toString();
00197 
00198     pattern_map = new String[] {
00199       instruction_pattern, binstruction_pattern, if_icmp_pattern, if_pattern,
00200       push_pattern, iload_pattern, aload_pattern, fload_pattern,
00201       dload_pattern, lload_pattern, istore_pattern, astore_pattern,
00202       fstore_pattern, dstore_pattern, lstore_pattern, invoke_pattern, return_pattern,
00203       if_pattern2
00204     };
00205   }
00206 
00207   /**
00208    * @param il instruction list to search for given patterns
00209    */
00210   public FindPattern(InstructionList il) {
00211     this.il = il;
00212     reread();
00213   }  
00214   /**
00215    * Append instructions characters starting from `start' to `to'.
00216    */
00217   private final static void appendPatterns(StringBuffer buf, short from, short to) {
00218     for(short i=from; i <= to; i++) {
00219       buf.append(makeChar(i));
00220 
00221       if(i < to)
00222     buf.append('|');
00223     }
00224   }  
00225   /**
00226    * @return the inquired instruction list
00227    */
00228   public final InstructionList getInstructionList() { return il; }  
00229   /**
00230    * @return the matched piece of code as an array of instruction (handles)
00231    */
00232   public final InstructionHandle[] getMatch() {
00233     if(match_length == -1)
00234       throw new ClassGenException("Nothing matched.");
00235 
00236     InstructionHandle[] match = new InstructionHandle[match_length];
00237     System.arraycopy(handles, matched_from, match, 0, match_length);
00238 
00239     return match;
00240   }  
00241   /**
00242    * @return number of matched instructions, or -1 if the match did not succeed
00243    */
00244   public final int getMatchLength() { return match_length; }  
00245   /**
00246    * Map symbolic strings like `branchinstruction' or `'a regular expression string
00247    * such as (a|b|z) (where a,b,c whil be non-printable characters in LATIN-1)
00248    *
00249    * @param pattern instruction pattern in lower case
00250    * @return encoded string for a pattern such as "BranchInstruction".
00251    */
00252   private static final String getPattern(String pattern) {
00253     // Check for abbreviations
00254     for(int i=0; i < patterns.length; i++) {
00255       if(pattern.equals(patterns[i]))
00256     return pattern_map[i]; // return the string mapped to that name
00257     }
00258 
00259     // Check for opcode names
00260     for(short i=0; i < NO_OPCODES; i++)
00261       if(pattern.equals(Constants.OPCODE_NAMES[i]))
00262     return new String(new char[] { makeChar(i) });
00263 
00264     return null; // Failed to match
00265   }  
00266   /**
00267    * Convert opcode number to char.
00268    */
00269   private static final char makeChar(short opcode) {
00270     return (char)(opcode + OFFSET);
00271   }  
00272   /**
00273    * Replace all occurences of `something' with the appropiate pattern, the `' chars
00274    * are used as an escape sequence.
00275    * Other characters than the escaped one will be ignored, in particular the meta
00276    * characters used for regular expression such as *, +, [, etc.
00277    *
00278    * @param pattern The pattern to compile
00279    * @return complete regular expression string
00280    */
00281   private static final String makePattern(String pattern) {
00282     String       lower      = pattern.toLowerCase();
00283     StringBuffer buf        = new StringBuffer();
00284     int          size       = pattern.length();
00285     boolean      in_pattern = false; // remember current state
00286     StringBuffer collect    = null;
00287 
00288     try {
00289       for(int i=0; i < size; i++) {
00290     char ch = lower.charAt(i);
00291     
00292     switch(ch) {
00293     case '`': // Start of instruction pattern
00294       if(in_pattern)
00295         throw new ClassGenException("` within `' block.");
00296 
00297       collect    = new StringBuffer();
00298       in_pattern = true; // remember current state
00299       break;
00300 
00301     case '\'': // end of instruction pattern
00302       if(!in_pattern)
00303         throw new ClassGenException("' without starting `.");
00304       
00305       in_pattern = false;
00306       String str = collect.toString(); // String within the `'
00307       String pat = getPattern(str);
00308 
00309       if(pat == null)
00310         throw new ClassGenException("Unknown instruction pattern: \"" + str +
00311                     "\"" + " at index " + i);
00312       buf.append(pat);
00313       break;
00314 
00315     default:
00316       if(in_pattern)
00317         collect.append(ch);
00318       else
00319         buf.append(ch); // Just append it (meta character)
00320     }
00321       }
00322     } catch(StringIndexOutOfBoundsException e) {
00323       e.printStackTrace();
00324     }
00325 
00326     return buf.toString();
00327   }  
00328   /**
00329    * Internal debugging routines.
00330    */
00331   private static final String pattern2string(String pattern) {
00332     return pattern2string(pattern, true);
00333   }  
00334   private static final String pattern2string(String pattern, boolean make_string) {
00335     StringBuffer buf = new StringBuffer();
00336 
00337     for(int i=0; i < pattern.length(); i++) {
00338       char ch = pattern.charAt(i);
00339 
00340       if(ch >= OFFSET) {
00341     if(make_string)
00342       buf.append(Constants.OPCODE_NAMES[ch - OFFSET]);
00343     else
00344       buf.append((int)(ch - OFFSET));
00345       }
00346       else
00347     buf.append(ch);
00348     }
00349 
00350     return buf.toString();
00351   }  
00352   /**
00353    * Rereads the instruction list, e.g., after you've altered the list upon a match.
00354    */
00355   public final void reread() {
00356     int    size  = il.getLength();
00357     char[] buf   = new char[size]; // Create a string with length equal to il length
00358     handles      = il.getInstructionHandles();
00359 
00360     match_length = -1; // reset match_length
00361 
00362     // Map opcodes to characters
00363     for(int i=0; i < size; i++)
00364       buf[i] = makeChar(handles[i].getInstruction().getTag());
00365 
00366     il_string = new String(buf);
00367   }  
00368   /**
00369    * Start search beginning from the start of the given instruction list.
00370    *
00371    * @param pattern the instruction pattern to search for, case is ignored
00372    * @return instruction handle or `null' if the matching fails
00373    */
00374   public final InstructionHandle search(String pattern) {
00375     return search(pattern, il.getStart(), null);
00376   }  
00377   /**
00378    * Start search beginning from the start of the given instruction list.
00379    * Check found matches with the constraint object.
00380    *
00381    * @param pattern the instruction pattern to search for, case is ignored
00382    * @param constraint constraints to be checked on matching code
00383    * @return instruction handle or `null' if the match failed
00384    */
00385   public final InstructionHandle search(String pattern, CodeConstraint constraint) {
00386     return search(pattern, il.getStart(), constraint);
00387   }  
00388   /**
00389    * Start search beginning from `from'.
00390    *
00391    * @param pattern the instruction pattern to search for, case is ignored
00392    * @param from where to start the search in the instruction list
00393    * @return instruction handle or `null' if the matching fails
00394    */
00395   public final InstructionHandle search(String pattern, InstructionHandle from) {
00396     return search(pattern, from, null);
00397   }  
00398   /**
00399    * Search for the given pattern in the InstructionList. You may use the following
00400    * special expressions in your pattern string which match instructions that belong
00401    * to the denoted class. The `' are an escape and <b>must not</b> be omitted.
00402    *
00403    * You can use the Instruction names directly:
00404    *
00405    * `ILOAD_1', `GOTO', 'NOP', etc..
00406    *
00407    * For convenience there exist some abbreviations for instructions that belong
00408    * to the same group (underscores _ are used as some kind of wildcards):
00409    *
00410    * `Instruction', `BranchInstruction', `InvokeInstruction', `ReturnInstruction',
00411    * `IfInstruction' correspond to their classes.
00412    *
00413    * `IF_ICMP__', `IF__', where __ stands for EQ, LE, etc.
00414    * `xLOAD__', `xSTORE__', where x stands for I, D, F, L or A. __ is 0..3 or empty
00415    * `PUSH' stands for any LDC, xCONST__, SIPUSH or BIPUSH instruction
00416    *
00417    * You <B>must</B> put the `' around these words or they can't be matched correctly.
00418    *
00419    * For the rest the usual (PERL) pattern matching rules apply.<P>
00420    * Example pattern:
00421    * <pre>
00422      search("(`BranchInstruction')`NOP'((`IF_ICMP__'|`GOTO')+`ISTORE__'`Instruction')*");
00423    * </pre>
00424    *
00425    *
00426    * @param pattern the instruction pattern to search for, case is ignored
00427    * @param from where to start the search in the instruction list
00428    * @param constraint optional CodeConstraint to check the found code pattern for
00429    * given constraints
00430    * @return instruction handle or `null' if the matching fails
00431    */
00432   public final InstructionHandle search(String pattern, InstructionHandle from,
00433                     CodeConstraint constraint) {
00434     String search = makePattern(pattern);
00435     int  start    = -1;
00436 
00437     match_length = matched_from = -1; // reset
00438 
00439     for(int i=0; i < handles.length; i++) {
00440       if(handles[i] == from) {
00441     start = i; // Where to start search from (index)
00442     break;
00443       }
00444     }
00445 
00446     if(start == -1)
00447       throw new ClassGenException("Instruction handle " + from + 
00448                   " not found in instruction list.");
00449 
00450     try {
00451       RE      regex = new RE(search);
00452       REMatch r     = regex.getMatch(il_string, start);
00453       
00454       if(r != null) {
00455     matched_from = r.getStartIndex();
00456     match_length = (r.getEndIndex() - matched_from);
00457 
00458     if((constraint == null) || constraint.checkCode(getMatch()))
00459       return handles[matched_from];
00460       }
00461     } catch(REException e) {
00462       System.err.println(e);
00463     }
00464 
00465     return null;
00466   }  
00467   /**
00468    * Defines a new instruction list. Automatically calls
00469    * <a href="#reread()">reread()</a> to update the object.
00470    *
00471    * @param il the new instuction list
00472    */
00473   public final void setInstructionList(InstructionList il) {
00474     this.il = il;
00475     reread();
00476   }  
00477 }

Generated at Thu Feb 7 06:45:45 2002 for Bandera by doxygen1.2.10 written by Dimitri van Heesch, © 1997-2001