/* * JacORB - a free Java ORB * * Copyright (C) 1997-2003 Gerald Brose. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.jacorb.idl; import java_cup.runtime.*; import java.util.*; /** * This class implements a scanner (aka lexical analyzer or * lexer) for IDL. The scanner reads characters from a global input * stream and returns integers corresponding to the terminal number * of the next token. Once the end of input is reached the EOF token * is returned on every subsequent call.
* * All symbol constants are defined in sym.java which is generated by * JavaCup from parser.cup.
* * In addition to the scanner proper (called first via init() then * with next_token() to get each token) this class provides simple * error and warning routines and keeps a count of errors and * warnings that is publicly accessible. It also provides basic * preprocessing facilties, i.e. it does handle preprocessor * directives such as #define, #undef, #include, etc. although it * does not provide full C++ preprocessing * * This class is "static" (i.e., it has only static members and methods). * * @version $Id: lexer.java,v 1.43 2003/10/30 08:49:41 brose Exp $ * @author Gerald Brose * */ public class lexer { private static org.apache.log.Logger logger = parser.getLogger(); /** First and second character of lookahead. */ protected static int next_char; protected static int next_char2; /** EOF constant. */ protected static final int EOF_CHAR = -1; /** * Table of keywords. Keywords are initially treated as * identifiers. Just before they are returned we look them up in * this table to see if they match one of the keywords. The * string of the name is the key here, which indexes Integer * objects holding the symbol number. */ protected static Hashtable keywords = new Hashtable(); /** Table of keywords, stored in lower case. Keys are the * lower case version of the keywords used as keys for the keywords * hash above, and the values are the case sensitive versions of * the keywords. This table is used for detecting collisions of * identifiers with keywords. */ protected static Hashtable keywords_lower_case = new Hashtable(); /** Table of Java reserved names. */ protected static Hashtable java_keywords = new Hashtable(); /** Table of single character symbols. For ease of implementation, we * store all unambiguous single character tokens in this table of Integer * objects keyed by Integer objects with the numerical value of the * appropriate char (currently Character objects have a bug which precludes * their use in tables). */ protected static Hashtable char_symbols = new Hashtable( 25 ); /** Defined symbols (preprocessor) */ protected static Hashtable defines = new Hashtable(); protected static boolean conditionalCompilation = true; /** nested #ifdefs are pushed on this stack by the "preprocessor" */ private static java.util.Stack ifStack = new Stack(); private static java.util.Stack tokenStack = new Stack(); /** Current line number for use in error messages. */ protected static int current_line = 1; /** Current line for use in error messages. */ protected static StringBuffer line = new StringBuffer(); /** Character position in current line. */ protected static int current_position = 1; /** Have we already read a '"' ? */ protected static boolean in_string = false; /** Are we processing a wide char or string ? */ protected static boolean wide = false; /** Count of total errors detected so far. */ static int error_count = 0; /** Count of warnings issued so far */ public static int warning_count = 0; /** currently active pragma prefix */ public static String currentPragmaPrefix = ""; /** current file name */ public static String currentFile = ""; /** reset the scanner state */ public static void reset() { current_position = 1; error_count = 0; warning_count = 0; currentPragmaPrefix = ""; line = new StringBuffer(); ifStack.removeAllElements(); tokenStack.removeAllElements(); defines.clear(); } /** * Initialize the scanner. This sets up the keywords and char_symbols * tables and reads the first two characters of lookahead. * * "Object" is listed as reserved in the OMG spec. * "int" is not, but I reserved it to bar its usage as a legal integer * type. */ public static void init() throws java.io.IOException { /* set up standard symbols */ defines.put( "JACORB_IDL_1_4", "" ); /* set up the keyword table */ keywords.put( "abstract", new Integer( sym.ABSTRACT ) ); keywords.put( "any", new Integer( sym.ANY ) ); keywords.put( "attribute", new Integer( sym.ATTRIBUTE ) ); keywords.put( "boolean", new Integer( sym.BOOLEAN ) ); keywords.put( "case", new Integer( sym.CASE ) ); keywords.put( "char", new Integer( sym.CHAR ) ); keywords.put( "const", new Integer( sym.CONST ) ); keywords.put( "context", new Integer( sym.CONTEXT ) ); keywords.put( "custom", new Integer( sym.CUSTOM ) ); keywords.put( "default", new Integer( sym.DEFAULT ) ); keywords.put( "double", new Integer( sym.DOUBLE ) ); keywords.put( "enum", new Integer( sym.ENUM ) ); keywords.put( "exception", new Integer( sym.EXCEPTION ) ); keywords.put( "factory", new Integer( sym.FACTORY ) ); keywords.put( "FALSE", new Integer( sym.FALSE ) ); keywords.put( "fixed", new Integer( sym.FIXED ) ); keywords.put( "float", new Integer( sym.FLOAT ) ); keywords.put( "in", new Integer( sym.IN ) ); keywords.put( "inout", new Integer( sym.INOUT ) ); keywords.put( "interface", new Integer( sym.INTERFACE ) ); keywords.put( "local", new Integer( sym.LOCAL ) ); keywords.put( "long", new Integer( sym.LONG ) ); keywords.put( "module", new Integer( sym.MODULE ) ); keywords.put( "native", new Integer( sym.NATIVE ) ); keywords.put( "Object", new Integer( sym.OBJECT ) ); keywords.put( "octet", new Integer( sym.OCTET ) ); keywords.put( "oneway", new Integer( sym.ONEWAY ) ); keywords.put( "out", new Integer( sym.OUT ) ); keywords.put( "private", new Integer( sym.PRIVATE ) ); keywords.put( "public", new Integer( sym.PUBLIC ) ); keywords.put( "pseudo", new Integer( sym.PSEUDO ) ); keywords.put( "raises", new Integer( sym.RAISES ) ); keywords.put( "readonly", new Integer( sym.READONLY ) ); keywords.put( "sequence", new Integer( sym.SEQUENCE ) ); keywords.put( "short", new Integer( sym.SHORT ) ); keywords.put( "string", new Integer( sym.STRING ) ); keywords.put( "struct", new Integer( sym.STRUCT ) ); keywords.put( "supports", new Integer( sym.SUPPORTS ) ); keywords.put( "switch", new Integer( sym.SWITCH ) ); keywords.put( "TRUE", new Integer( sym.TRUE ) ); keywords.put( "truncatable", new Integer( sym.TRUNCATABLE ) ); keywords.put( "typedef", new Integer( sym.TYPEDEF ) ); keywords.put( "unsigned", new Integer( sym.UNSIGNED ) ); keywords.put( "union", new Integer( sym.UNION ) ); keywords.put( "ValueBase", new Integer( sym.VALUEBASE ) ); keywords.put( "valuetype", new Integer( sym.VALUETYPE ) ); keywords.put( "void", new Integer( sym.VOID ) ); keywords.put( "wchar", new Integer( sym.WCHAR ) ); keywords.put( "wstring", new Integer( sym.WSTRING ) ); keywords.put( "::", new Integer( sym.DBLCOLON ) ); keywords.put( "<<", new Integer( sym.LSHIFT ) ); keywords.put( ">>", new Integer( sym.RSHIFT ) ); keywords.put( "L\"", new Integer( sym.LDBLQUOTE ) ); // setup the mapping of lower case keywords to case sensitive // keywords for( java.util.Enumeration e = keywords.keys(); e.hasMoreElements(); ) { String keyword = (String)e.nextElement(); String keyword_lower_case = keyword.toLowerCase(); keywords_lower_case.put( keyword_lower_case, keyword ); } /* set up the table of single character symbols */ char_symbols.put( new Integer( ';' ), new Integer( sym.SEMI ) ); char_symbols.put( new Integer( ',' ), new Integer( sym.COMMA ) ); char_symbols.put( new Integer( '*' ), new Integer( sym.STAR ) ); char_symbols.put( new Integer( '.' ), new Integer( sym.DOT ) ); char_symbols.put( new Integer( ':' ), new Integer( sym.COLON ) ); char_symbols.put( new Integer( '=' ), new Integer( sym.EQUALS ) ); char_symbols.put( new Integer( '+' ), new Integer( sym.PLUS ) ); char_symbols.put( new Integer( '-' ), new Integer( sym.MINUS ) ); char_symbols.put( new Integer( '{' ), new Integer( sym.LCBRACE ) ); char_symbols.put( new Integer( '}' ), new Integer( sym.RCBRACE ) ); char_symbols.put( new Integer( '(' ), new Integer( sym.LPAREN ) ); char_symbols.put( new Integer( ')' ), new Integer( sym.RPAREN ) ); char_symbols.put( new Integer( '[' ), new Integer( sym.LSBRACE ) ); char_symbols.put( new Integer( ']' ), new Integer( sym.RSBRACE ) ); char_symbols.put( new Integer( '<' ), new Integer( sym.LESSTHAN ) ); char_symbols.put( new Integer( '>' ), new Integer( sym.GREATERTHAN ) ); char_symbols.put( new Integer( '\'' ), new Integer( sym.QUOTE ) ); char_symbols.put( new Integer( '\"' ), new Integer( sym.DBLQUOTE ) ); char_symbols.put( new Integer( '\\' ), new Integer( sym.BSLASH ) ); char_symbols.put( new Integer( '^' ), new Integer( sym.CIRCUM ) ); char_symbols.put( new Integer( '&' ), new Integer( sym.AMPERSAND ) ); char_symbols.put( new Integer( '/' ), new Integer( sym.SLASH ) ); char_symbols.put( new Integer( '%' ), new Integer( sym.PERCENT ) ); char_symbols.put( new Integer( '~' ), new Integer( sym.TILDE ) ); char_symbols.put( new Integer( '|' ), new Integer( sym.BAR ) ); char_symbols.put( new Integer( ' ' ), new Integer( sym.SPACE ) ); /* set up reserved Java names */ java_keywords.put( "abstract", "" ); java_keywords.put( "boolean", "" ); java_keywords.put( "break", "" ); java_keywords.put( "byte", "" ); java_keywords.put( "case", "" ); java_keywords.put( "catch", "" ); java_keywords.put( "char", "" ); java_keywords.put( "class", "" ); java_keywords.put( "const", "" ); java_keywords.put( "continue", "" ); java_keywords.put( "default", "" ); java_keywords.put( "do", "" ); java_keywords.put( "double", "" ); java_keywords.put( "else", "" ); java_keywords.put( "extends", "" ); java_keywords.put( "false", "" ); java_keywords.put( "final", "" ); java_keywords.put( "finally", "" ); java_keywords.put( "float", "" ); java_keywords.put( "for", "" ); java_keywords.put( "goto", "" ); java_keywords.put( "if", "" ); java_keywords.put( "implements", "" ); java_keywords.put( "import", "" ); java_keywords.put( "instanceof", "" ); java_keywords.put( "int", "" ); java_keywords.put( "interface", "" ); java_keywords.put( "long", "" ); java_keywords.put( "native", "" ); java_keywords.put( "new", "" ); java_keywords.put( "null", "" ); java_keywords.put( "package", "" ); java_keywords.put( "private", "" ); java_keywords.put( "protected", "" ); java_keywords.put( "public", "" ); java_keywords.put( "return", "" ); java_keywords.put( "short", "" ); java_keywords.put( "static", "" ); java_keywords.put( "super", "" ); java_keywords.put( "switch", "" ); java_keywords.put( "synchronized", "" ); java_keywords.put( "true", "" ); java_keywords.put( "this", "" ); java_keywords.put( "throw", "" ); java_keywords.put( "throws", "" ); java_keywords.put( "transient", "" ); java_keywords.put( "try", "" ); java_keywords.put( "void", "" ); java_keywords.put( "volatile", "" ); java_keywords.put( "while", "" ); java_keywords.put( "clone", "" ); java_keywords.put( "equals", "" ); java_keywords.put( "finalize", "" ); java_keywords.put( "getClass", "" ); java_keywords.put( "hashCode", "" ); java_keywords.put( "notify", "" ); java_keywords.put( "notifyAll", "" ); java_keywords.put( "toString", "" ); java_keywords.put( "wait", "" ); /* stack needs a topmost value */ ifStack.push( new Boolean( true ) ); /* read two characters of lookahead */ try { next_char = GlobalInputStream.read(); } catch( Exception e ) { org.jacorb.idl.parser.fatal_error( "Cannot read from file " + GlobalInputStream.currentFile().getAbsolutePath() + ", please check file name.", null ); } if( next_char == EOF_CHAR ) next_char2 = EOF_CHAR; else next_char2 = GlobalInputStream.read(); } public static void define( String symbol, String value ) { if( logger.isDebugEnabled() ) logger.debug( "Defining: " + symbol + " as " + value ); defines.put( symbol, value ); } public static void undefine( String symbol ) { if( logger.isDebugEnabled() ) logger.debug( "Un-defining: " + symbol ); defines.remove( symbol ); } public static String defined( String symbol ) { return (String)defines.get( symbol ); } /** * record information about the last lexical scope so that it can be * restored later */ public static int currentLine() { return current_line; } /** * return the current reading position */ public static PositionInfo getPosition() { return new PositionInfo( current_line, current_position, currentPragmaPrefix, line.toString(), GlobalInputStream.currentFile() ); } public static void restorePosition( PositionInfo p ) { current_line = p.line_no; currentPragmaPrefix = p.pragma_prefix; current_position = 0; } /** * Advance the scanner one character in the input stream. This moves * next_char2 to next_char and then reads a new next_char2. */ protected static void advance() throws java.io.IOException { int old_char; old_char = next_char; next_char = next_char2; next_char2 = GlobalInputStream.read(); line.append( (char)old_char ); /* count this */ current_position++; if( old_char == '\n' ) { current_line++; current_position = 1; line = new StringBuffer(); } // if( logger.isDebugEnabled() ) // logger.debug("Lexer.advance() next_char is " + next_char + " (" + (char)next_char + ")"); } /** * Emit an error message. The message will be marked with both the * current line number and the position in the line. Error messages * are printed on standard error (System.err). * @param message the message to print. * @param p_info an optional PositionInfo object */ public static void emit_error( String message ) { System.err.println( GlobalInputStream.currentFile().getAbsolutePath() + ", line: " + current_line + "(" + current_position + "): " + message ); System.err.println( "\t" + line.toString() ); error_count++; } public static void emit_error( String message, str_token t ) { if( t == null ) { emit_error( message ); } else { System.err.println( "Error in " + t.fileName + ", line:" + t.line_no + "(" + t.char_pos + "): " + message ); System.err.println( "\t" + t.line_val ); error_count++; } } /** * Emit a warning message. The message will be marked with both the * current line number and the position in the line. Messages are * printed on standard error (System.err). * * @param message the message to print. */ public static void emit_warn( String message ) { System.err.println( "Warning: " + message + " at " + current_line + "(" + current_position + "): \"" + line.toString() + "\"" ); warning_count++; } public static void emit_warn( String message, str_token t ) { if( t == null ) { emit_warn( message ); } else { System.err.println( "Warning at " + t.fileName + ", line:" + t.line_no + "(" + t.char_pos + "): " + message ); System.err.println( "\t" + t.line_val ); warning_count++; } } /** * Determine if a character is ok to start an id. * @param ch the character in question. */ protected static boolean id_start_char( int ch ) { return ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) || ( ch == '_' ); } /** * Determine if a character is ok for the middle of an id. * @param ch the character in question. */ protected static boolean id_char( int ch ) { return id_start_char( ch ) || ( ch == '_' ) || ( ch >= '0' && ch <= '9' ); } /** * Try to look up a single character symbol, returns -1 for not found. * @param ch the character in question. */ protected static int find_single_char( int ch ) { Integer result; result = (Integer)char_symbols.get( new Integer( (char)ch ) ); if( result == null ) return -1; else return result.intValue(); } /** * Handle swallowing up a comment. Both old style C and new style C++ * comments are handled. */ protected static void swallow_comment() throws java.io.IOException { /* next_char == '/' at this point */ /* is it a traditional comment */ if( next_char2 == '*' ) { /* swallow the opener */ advance(); advance(); /* swallow the comment until end of comment or EOF */ for( ; ; ) { /* if its EOF we have an error */ if( next_char == EOF_CHAR ) { emit_error( "Specification file ends inside a comment", null ); return; } /* if we can see the closer we are done */ if( next_char == '*' && next_char2 == '/' ) { advance(); advance(); return; } /* otherwise swallow char and move on */ advance(); } } /* is its a new style comment */ if( next_char2 == '/' ) { /* swallow the opener */ advance(); advance(); /* swallow to '\n', '\f', or EOF */ while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR ) { advance(); } return; } /* shouldn't get here, but... if we get here we have an error */ emit_error( "Malformed comment in specification -- ignored", null ); advance(); } /** * Preprocessor directives are handled here. */ protected static void preprocess() throws java.io.IOException { for( ; ; ) { /* if its EOF we have an error */ if( next_char == EOF_CHAR ) { emit_error( "Specification file ends inside a preprocessor directive", null ); return; } else if( next_char != '#' ) { emit_error( "expected #, got " + (char)next_char + " instead!", null ); } else advance(); // skip '#' // the following is done to allow for # ifdef sloppiness while( ( ' ' == next_char ) || ( '\t' == next_char ) ) advance(); String dir = get_string(); if( dir.equals( "include" ) ) { if( !conditionalCompilation ) return; advance(); // skip ' ' boolean useIncludePath = ( next_char == '<' ); advance(); // skip `\"' or '<' String fname = get_string(); if( useIncludePath && ( next_char != '>' ) ) emit_error( "Syntax error in #include directive, expecting '>'" ); else if( !useIncludePath && ( next_char != '\"' ) ) emit_error( "Syntax error in #include directive, expecting \"" ); /* swallow to '\n', '\f', or EOF */ while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR ) { advance(); } GlobalInputStream.include( fname, next_char2, useIncludePath ); current_line = 0; advance(); advance(); // System.out.println("returning from include, next_char is: " + (char)next_char); return; } else if( dir.equals( "define" ) ) { if( !conditionalCompilation ) return; // advance(); // skip ' ' swallow_whitespace(); String name = get_string(); StringBuffer text = new StringBuffer(); if( next_char == ' ' ) { advance(); } while( next_char != '\n' ) { if( next_char == '\\' ) { advance(); advance(); } text.append( (char)next_char ); advance(); } // System.out.println("#Defined symbol " + name + " as: " + text.toString() ); define( name, text.toString() ); } else if( dir.equals( "error" ) ) { if( !conditionalCompilation ) return; advance(); // skip ' ' String name = get_string(); emit_error( name ); } else if( dir.equals( "undef" ) ) { if( !conditionalCompilation ) return; swallow_whitespace(); // advance(); // skip ' ' String name = get_string(); undefine( name ); // System.out.println("#Undefined symbol " + name ); } else if( dir.equals( "if" ) || dir.equals( "elif" ) ) { if (! dir.equals( "elif" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; } swallow_whitespace(); // the following snippet distinguishes between #if defined // and #if !defined boolean straightDefined = true; if( '!' == next_char ) { advance(); straightDefined = false; } String defineStr = get_string_no_paren(); if (defineStr.equals ("defined")) { swallow_whitespace(); boolean brackets = ( '(' == next_char ); if( brackets ) { advance(); // skip '(' swallow_whitespace(); // any whitespace after ( ? skip it } String name = get_string_no_paren(); if( brackets ) { swallow_whitespace(); if( logger.isDebugEnabled() ) logger.debug( "next char: " + next_char ); if( ')' != next_char ) { emit_error( "Expected ) terminating #if defined", null ); return; } advance(); } if( straightDefined ) conditionalCompilation = ( null != defined( name ) ); else conditionalCompilation = ( null == defined( name ) ); } else if (defineStr.equals("0")) { conditionalCompilation = false; } else if (defineStr.equals("1")) { conditionalCompilation = true; } else { emit_error( "Expected \"defined\" following #if: " + dir, null ); return; } } else if( dir.equals( "ifdef" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; swallow_whitespace(); //advance(); // skip ' ' String name = get_string(); conditionalCompilation = ( defined( name ) != null ); } else if( dir.equals( "ifndef" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; swallow_whitespace();//advance(); // skip ' ' String name = get_string(); conditionalCompilation = ( defined( name ) == null ); } else if( dir.equals( "else" ) ) { if( ( (Boolean)ifStack.peek() ).booleanValue() ) conditionalCompilation = !conditionalCompilation; } else if( dir.equals( "endif" ) ) { boolean b = ( (Boolean)ifStack.pop() ).booleanValue(); conditionalCompilation = b; } else if( dir.equals( "pragma" ) ) { if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); if( name.equals( "prefix" ) ) { advance(); currentPragmaPrefix = get_string(); } else if( name.equals( "version" ) ) { advance(); // skip ' ' String vname = get_string(); advance(); // skip ' ' String version = get_string(); String existingVersion = (String) parser.currentScopeData().versionMap.get (vname); if (existingVersion == null) { // Set version parser.currentScopeData().versionMap.put (vname, version); } else { // Check for version change if (! existingVersion.equals (version)) { emit_error ( "Version re-declaration with different value: #pragma version " + version, null ); } } String iname = (String)parser.currentScopeData().idMap.get (vname); if (iname != null) { if (version.equals (iname.substring (1 + iname.lastIndexOf (':'))) == false) { emit_error ("Declaring version with different version to already declared ID for " + name, null); } } } else if( name.equals( "ID" ) ) { advance(); // skip ' ' String iname = get_string(); advance(); // skip ' ' String id = get_string(); String existingID = (String) parser.currentScopeData().idMap.get (iname); if (existingID == null) { // Set id parser.currentScopeData().idMap.put (iname, id); } else { // Check for id change if (! existingID.equals (id)) { emit_error ( "ID re-declaration with different value: #pragma id " + id, null ); } } if( parser.currentScopeData().versionMap.get( iname ) != null ) { if( ((String)parser.currentScopeData().versionMap.get( iname )).equals ( id.substring (1 + id.lastIndexOf (':'))) == false ) { emit_error ("Declaring ID with different version to already declared version for " + name, null); } } } else if( name.equals( "inhibit_code_generation" ) ) { /* proprietary pragma of the JacORB IDL compiler */ parser.setInhibitionState( true ); // do something with it } else { emit_warn( "Unknown pragma, ignoring: #pragma " + name, null ); } } else { emit_error( "Unrecognized preprocessor directive " + dir, null ); } /* swallow to '\n', '\f', or EOF */ while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR ) { // System.out.println("Advancing after directive, next_char is: " + (char)next_char); advance(); } return; } } // the following is used for parsing the #if defined(...) construct private static String get_string_no_paren() throws java.io.IOException { StringBuffer sb = new StringBuffer(); char c = (char)next_char; while( c != ' ' && c != '\t' && c != '\r' && c != '\n' && c != '\f' && c != EOF_CHAR && c != '\"' && c != '<' && c != '>' && c != '(' && c != ')' ) { sb.append( c ); advance(); c = (char)next_char; } // System.out.println("get string returns " + sb.toString()); return sb.toString(); } private static String get_string() throws java.io.IOException { StringBuffer sb = new StringBuffer( "" ); if( next_char == '\"' ) { advance(); while( next_char != '\"' ) { if( next_char == EOF_CHAR ) emit_error( "Unexpected EOF in string" ); sb.append( (char)next_char ); advance(); } } else { while( next_char != ' ' && next_char != '\t' && next_char != '\r' && next_char != '\n' && next_char != '\f' && next_char != EOF_CHAR && next_char != '\"' && next_char != '<' && next_char != '>' ) { sb.append( (char)next_char ); advance(); } } return sb.toString(); } /** * Process an identifier. *
* Identifiers begin with a letter, underscore, or dollar sign,
* which is followed by zero or more letters, numbers,
* underscores or dollar signs. This routine returns a str_token
* suitable for return by the scanner or null, if the string that
* was read expanded to a symbol that was #defined. In this case,
* the symbol is expanded in place
*/
protected static token do_symbol()
throws java.io.IOException
{
StringBuffer result = new StringBuffer();
String result_str;
Integer keyword_num = null;
char buffer[] = new char[ 1 ];
/* next_char holds first character of id */
buffer[ 0 ] = (char)next_char;
result.append( buffer, 0, 1 );
advance();
/* collect up characters while they fit in id */
while( id_char( next_char ) )
{
buffer[ 0 ] = (char)next_char;
result.append( buffer, 0, 1 );
advance();
}
/* extract a string */
result_str = result.toString();
/* try to look it up as a defined symbol... */
String text = defined( result_str );
if( text != null )
{
char[] next = {(char)next_char, (char)next_char2};
GlobalInputStream.insert( text + ( new String( next ) ) );
advance(); // restore lookahead
advance(); // restore lookahead
return null;
}
// check if it's a keyword
// if( logger.isInfoEnabled() )
// logger.info( "Advancing after symbol " + result_str );
keyword_num = (Integer)keywords.get( result_str );
if( keyword_num != null )
{
if( isScope( result_str ) )
{
parser.openScope();
}
return new token( keyword_num.intValue() );
}
// not a keyword, so treat as identifier after verifying
// case sensitivity rules and prefacing with an _
// if it collides with a Java keyword.
result_str = checkIdentifier( result_str );
if( null != result_str )
return new str_token( sym.ID, result_str, getPosition(),
GlobalInputStream.currentFile().getName() );
else
return null;
}
private static boolean isScope( String keyword )
{
return ( keyword.equals( "module" ) ||
keyword.equals( "interface" ) ||
keyword.equals( "struct" ) ||
keyword.equals( "exception" ) ||
keyword.equals( "union" )
// keyword.equals("valuetype")
);
}
/**
* Checks whether Identifier str is legal and returns it. If the
* identifier is escaped with a leading underscore, that
* underscore is removed. If a the legal IDL identifier clashes
* with a Java reserved word, an underscore is prepended.
*
* @param str - the IDL identifier
*
* Prints an error msg if the identifier collides with an IDL
* keyword.
*/
public static String checkIdentifier( String str )
{
if( logger.isInfoEnabled() )
logger.info( "checking identifier " + str );
/* if it is an escaped identifier, look it up as a keyword,
otherwise remove the underscore. */
if( str.charAt( 0 ) == '_' )
{
str = str.substring( 1 );
}
else
{
String colliding_keyword = null;
if (org.jacorb.idl.parser.strict_names)
{
// check for name clashes strictly (i.e. case insensitive)
colliding_keyword =
(String)keywords_lower_case.get(str.toLowerCase());
}
else
{
// check for name clashes only loosely (i.e. case sensitive)
colliding_keyword =
(String)keywords.get(str);
}
if( colliding_keyword != null )
{
emit_error( "Identifier " + str + " collides with keyword " +
colliding_keyword + "." );
return null;
}
}
/* clashes with a Java reserved word? */
if( needsJavaEscape( str ) )
{
str = "_" + str;
}
return str;
}
/**
* Only the most general name clashes with Java keywords
* are caught here. Identifiers need to be checked again
* at different other places in the compiler!
*/
private static boolean needsJavaEscape( String s )
{
return ( java_keywords.containsKey( s ) );
}
/**
* called during the parse phase to catch clashes with
* Java reserved words.
*/
public static boolean strictJavaEscapeCheck( String s )
{
/*
return ( ( !s.equals( "Helper" ) && s.endsWith( "Helper" ) ) ||
( !s.equals( "Holder" ) && s.endsWith( "Holder" ) ) ||
( !s.equals( "Operations" ) && s.endsWith( "Operations" ) ) ||
( !s.equals( "Package" ) && s.endsWith( "Package" ) ) ||
( !s.equals( "POA" ) && s.endsWith( "POA" ) ) ||
( !s.equals( "POATie" ) && s.endsWith( "POATie" ) ) );
*/
boolean result =
( ( !s.equals( "Helper" ) && s.endsWith( "Helper" ) ) ||
( !s.equals( "Holder" ) && s.endsWith( "Holder" ) ) ||
( !s.equals( "Operations" ) && s.endsWith( "Operations" ) ) ||
( !s.equals( "Package" ) && s.endsWith( "Package" ) ) ||
( !s.equals( "POA" ) && s.endsWith( "POA" ) ) ||
( !s.equals( "POATie" ) && s.endsWith( "POATie" ) ) );
if (result == true) {
System.out.println("strictJavaEscapeCheck found String "+s+" "+
"needs escape");
System.out.println("Dumping the stack to see who calls this:");
try {
Integer foo = null;
foo.toString();
} catch (Exception e) {
e.printStackTrace();
}
// Thread.currentThread().dumpStack();
}
return result;
}
public static boolean needsJavaEscape( Module m )
{
String s = m.pack_name;
if( logger.isDebugEnabled() )
logger.debug( "checking module name " + s );
return ( strictJavaEscapeCheck( s ) );
}
/**
* Return one token. This is the main external interface to the scanner.
* It consumes sufficient characters to determine the next input token
* and returns it.
*/
public static token next_token()
throws java.io.IOException
{
parser.set_included( GlobalInputStream.includeState() );
token result = real_next_token();
// System.out.println("# next_token() => " + result.sym);
return result;
}
private static void swallow_whitespace()
throws java.io.IOException
{
/* look for white space */
while( next_char == ' ' || next_char == '\t' || next_char == '\n' ||
next_char == '\f' || next_char == '\r' )
{
/* advance past it and try the next character */
//System.out.println("Swallowing whitespace: " + next_char + (char)next_char);
advance();
}
}
/**
* The actual routine to return one token.
*
* @return token
* @throws java.io.IOException
*/
protected static token real_next_token()
throws java.io.IOException
{
int sym_num;
/* if we found more than a single token last time, these
tokens were remembered on the tokenStack - return the first
one here */
if( !tokenStack.empty() )
return (token)tokenStack.pop();
/* else */
for( ; ; )
{
/* scan input until we return something */
if( !in_string )
{
swallow_whitespace();
/* look for preprocessor directives */
if( (char)next_char == '#' )
{
preprocess();
continue;
}
/* look for a comment */
if( next_char == '/' && ( next_char2 == '*' || next_char2 == '/' ) )
{
/* swallow then continue the scan */
swallow_comment();
continue;
}
if( !conditionalCompilation )
{
advance();
if( next_char == EOF_CHAR )
{
emit_error( "EOF in conditional compilation!", null );
return null;
}
else
continue;
}
/* look for COLON or DBLCOLON */
if( next_char == ':' )
{
if( next_char2 == ':' )
{
advance();
advance();
return new token( sym.DBLCOLON );
}
else
{
advance();
return new token( sym.COLON );
}
}
/* leading L for wide strings */
if( next_char == 'L' && ( next_char2 =='\"' || next_char2 =='\'') )
{
wide = true;
advance();
if( next_char2 == '\"' )
{
advance();
in_string = true;
return new token( sym.LDBLQUOTE );
}
// wide char literal may follow, but detecting that
// is done below.
}
/* look for Shifts */
if( next_char == '<' )
{
if( next_char2 == '<' )
{
advance();
advance();
return new token( sym.LSHIFT );
}
else
{
advance();
return new token( sym.LESSTHAN );
}
}
if( next_char == '>' )
{
if( next_char2 == '>' )
{
advance();
advance();
return new token( sym.RSHIFT );
}
else
{
advance();
return new token( sym.GREATERTHAN );
}
}
/* leading 0: */
/* Try to scan octal/hexadecimal numbers, might even find a float */
if( next_char == '0' )
{
long l_val = 0;
long l_val_old = 0;
int radix = 8;
int digit = 0;
advance();
if( next_char == '.' )
{
StringBuffer f_string = new StringBuffer( "0." );
advance();
while( next_char >= '0' && next_char <= '9' )
{
f_string.append( (char)next_char );
advance();
}
float f_val = ( new Float( f_string.toString() ) ).floatValue();
return new float_token( sym.FLOAT_NUMBER, f_val );
}
else
{
// See if hexadecimal value
if( next_char == 'x' || next_char == 'X' )
{
advance();
radix = 16;
}
StringBuffer val = new StringBuffer( "0" );
digit = Character.digit( (char)next_char, radix );
while( digit != -1 )
{
val.append( (char)next_char );
advance();
digit = Character.digit( (char)next_char, radix );
}
String str = val.toString();
try
{
return new int_token( sym.NUMBER,
Integer.parseInt( str, radix ) );
}
catch( NumberFormatException ex )
{
try
{
return new long_token( sym.LONG_NUMBER,
Long.parseLong( str, radix ) );
}
catch( NumberFormatException ex2 )
{
ex2.printStackTrace ();
emit_error( "Invalid octal/hex value: " + str );
}
}
return null;
}
}
/* Try to scan integer, floating point or fixed point literals */
if (isDigit (((char)next_char)) ||
next_char == '.' ||
(next_char == '-' && isDigit (((char)next_char2))))
{
StringBuffer value = new StringBuffer();
StringBuffer fraction = null;
int exp = 0;
if ( next_char == '-' )
{
value.append( (char)next_char );
advance();
}
/* read integer part */
while( next_char >= '0' && next_char <= '9' )
{
value.append( (char)next_char );
// System.out.println("Read integer part " + value.toString());
advance();
}
/* read fraction */
if( next_char == '.' )
{
fraction = new StringBuffer();
advance();
// System.out.println("Reading fraction part");
while( next_char >= '0' && next_char <= '9' )
{
fraction.append( (char)next_char );
advance();
}
}
if( next_char == 'e' || next_char == 'E' )
{
// System.out.println("Reading exponent");
if( fraction == null )
fraction = new StringBuffer();
fraction.append( 'e' );
advance();
if( next_char == '-' || next_char == '+' )
{
fraction.append( (char)next_char );
advance();
}
while( next_char >= '0' && next_char <= '9' )
{
fraction.append( (char)next_char );
advance();
}
if( fraction.length() == 1 )
{
emit_error( "Empty exponent in float/double." );
continue;
}
return new float_token( sym.FLOAT_NUMBER,
Float.valueOf( value.toString() +
"." +
fraction.toString() ).floatValue() );
}
if( next_char == 'd' || next_char == 'D' )
{
advance();
if( fraction == null )
fraction = new StringBuffer();
java.math.BigDecimal bi =
new java.math.BigDecimal( value.toString() + "." +
fraction.toString() );
return new fixed_token( sym.FIXED_NUMBER, bi );
}
if( fraction == null )
{
/* integer or long */
token tok = null;
String str = value.toString();
try
{
tok = new int_token( sym.NUMBER, Integer.parseInt( str ) );
}
catch( NumberFormatException ex )
{
try
{
tok = new long_token
( sym.LONG_NUMBER, Long.parseLong( str ) );
}
catch( NumberFormatException ex2 )
{
try
{
// Not quite critical yet - lets try stuffing it into
// a bigdecimal for later checking.
tok = new fixed_token
(sym.FIXED_NUMBER, new java.math.BigDecimal (str));
}
catch (NumberFormatException ex3)
{
ex2.printStackTrace ();
emit_error( "Invalid long value: " + str );
}
}
}
return tok;
}
else
{
try
{
float f =
Float.valueOf( value.toString() + "." +
fraction.toString() ).floatValue();
return new float_token( sym.FLOAT_NUMBER, f );
}
catch( NumberFormatException nf )
{
emit_error( "Unexpected symbol: " +
value.toString() + "." +
fraction.toString() );
}
}
}
/* look for a single character symbol */
sym_num = find_single_char( next_char );
/* upon an opening double quote, return the
sym.DBLQUOTE token and continue scanning in the
in_string branch */
if( (char)next_char == '\"' )
{
in_string = true;
advance();
return new token( sym.DBLQUOTE );
}
if( (char)next_char == '\'' )
{
advance();
token t = null;
if( next_char == '\\' )
{
// Now need to process escaped character.
advance();
if( isDigit( (char)next_char ) )
{
// Octal character
char octal1 = '0';
char octal2 = '0';
char octal3 = (char)next_char;
if( isDigit( (char)next_char2 ) )
{
advance();
octal2 = octal3;
octal3 = (char)next_char;
if( isDigit( (char)next_char2 ) )
{
advance();
octal1 = octal2;
octal2 = octal3;
octal3 = (char)next_char;
}
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{octal1, octal2, octal3} ),
8
)
);
}
else if( (char)next_char == 'x' )
{
// Hexadecimal character
advance();
char hex1 = '0';
char hex2 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
hex1 = hex2;
hex2 = (char)next_char;
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal hex character" );
return null;
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{hex1, hex2} ),
16
)
);
}
else if( (char)next_char == 'u' )
{
if( wide == false )
{
emit_error( "Unicode characters are only legal with wide character" );
return null;
}
else
{
// Hexadecimal character
advance();
char uni1 = '0';
char uni2 = '0';
char uni3 = '0';
char uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni3 = uni4;
uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni2 = uni3;
uni3 = uni4;
uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni1 = uni2;
uni2 = uni3;
uni3 = uni4;
uni4 = (char)next_char;
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{uni1, uni2, uni3, uni4} ),
16
)
);
}
}
else
{
switch( next_char )
{
case 'n':
{
t = new char_token( sym.CH, '\n' );
break;
}
case 't':
{
t = new char_token( sym.CH, '\t' );
break;
}
case 'v':
{
t = new char_token( sym.CH, '\013' );
break;
}
case 'b':
{
t = new char_token( sym.CH, '\b' );
break;
}
case 'r':
{
t = new char_token( sym.CH, '\r' );
break;
}
case 'f':
{
t = new char_token( sym.CH, '\f' );
break;
}
case 'a':
{
t = new char_token( sym.CH, '\007' );
break;
}
case '\\':
{
t = new char_token( sym.CH, '\\' );
break;
}
case '?':
{
t = new char_token( sym.CH, '?' );
break;
}
case '0':
{
t = new char_token( sym.CH, '\0' );
break;
}
case '\'':
{
t = new char_token( sym.CH, '\'' );
break;
}
case '\"':
{
t = new char_token( sym.CH, '\"' );
break;
}
default:
{
emit_error( "Invalid escape symbol \'" );
return null;
}
}
}
}
else
{
t = new char_token( sym.CH, (char)next_char );
}
advance();
if( (char)next_char == '\'' )
{
tokenStack.push( new token( sym.QUOTE ) );
tokenStack.push( t );
advance();
}
else
{
emit_error( "Expecting closing \'" );
return null;
}
wide = false;
return new token( sym.QUOTE );
}
if( sym_num != -1 )
{
/* found one -- advance past it and return a token for it */
advance();
return new token( sym_num );
}
/* look for an id or keyword */
if( id_start_char( next_char ) )
{
token t = do_symbol();
if( t != null )
return t;
else
continue;
}
/* look for EOF */
if( next_char == EOF_CHAR )
{
return new token( sym.EOF );
}
}
else // in_string
{
/* empty string ? */
if( (char)next_char == '\"' )
{
in_string = false;
advance();
return new token( sym.DBLQUOTE );
// return new org.jacorb.idl.str_token(sym.ID, "", getPosition());
}
StringBuffer result = new StringBuffer();
char previous = ' ';
/* collect up characters while they fit in id */
while( true )
{
if( next_char == '\\' )
{
// Remap those characters that have no equivilant in java
switch( next_char2 )
{
case 'a':
{
result.append( "\\007" );
previous = 'a';
advance();
break;
}
case 'v':
{
result.append( "\\013" );
previous = 'v';
advance();
break;
}
case '?':
{
result.append( "?" );
previous = '?';
advance();
break;
}
// Replace \xA0 by octal equivilant
case 'x':
{
advance();
advance();
// Now next_char will be A and next_char2 will be 0
String octal = Integer.toOctalString
(
Integer.parseInt
(
new String
(
new char[]{
(char)next_char,
(char)next_char2}
),
16
)
);
if( octal.length() != 3 )
{
if( octal.length() == 1 )
{
octal = "0" + octal;
}
octal = "0" + octal;
}
result.append( "\\" + octal );
previous = (char)next_char2;
advance();
break;
}
case 'u':
{
if( wide == false )
{
emit_error( "Unicode characters are only legal with wide strings" );
return null;
}
else
{
result.append( (char)next_char );
result.append( (char)next_char2 );
advance();
advance();
char uni1 = (char)next_char;
char uni2 = '0';
char uni3 = '0';
char uni4 = '0';
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni2 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni3 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni4 = (char)next_char;
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
previous = uni4;
result.append( uni1 );
result.append( uni2 );
result.append( uni3 );
result.append( uni4 );
}
break;
}
default:
{
previous = (char)next_char;
result.append( (char)next_char );
}
}
}
else
{
previous = (char)next_char;
result.append( (char)next_char );
}
advance();
// Handle backslash quote but exit if just quote
if( ( (char)next_char ) == '\"' && previous != '\\' )
{
break;
}
}
wide = false;
String s = result.toString();
/* build and return an id token with an attached string */
return new org.jacorb.idl.str_token( sym.ID, s,
getPosition(),
GlobalInputStream.currentFile().getName() );
}
/* if we get here, we have an unrecognized character */
emit_warn( "Unrecognized character '" +
new Character( (char)next_char ) + "'(" + next_char + ") -- ignored" );
/* advance past it */
advance();
}
}
/**
* Returns true if character is US ASCII 0-9
*
* @param c a value of type 'char'
* @return a value of type 'boolean'
*/
static boolean isDigit( char c )
{
boolean result = false;
if( c >= '\u0030' )
{
if( c <= '\u0039' )
{
// Range 0030 [0] -> 0039 [9]
result = true;
}
}
return result;
}
/**
* Returns true if character is US ASCII 0-9, a-f, A-F
*
* @param c a value of type 'char'
* @return a value of type 'boolean'
*/
private static boolean isHexLetterOrDigit( char c )
{
boolean result = false;
if( c >= '\u0030' )
{
if( c <= '\u0039' )
{
// Range 0030 [0] -> 0039 [9]
result = true;
}
else
{
if( c >= '\u0041' )
{
if( c <= '\u0046' )
{
// Range 0041 [A] -> 0046 [F]
result = true;
}
if( c >= '\u0061' )
{
if( c <= '\u0066' )
{
// Range 0061 [a] -> 0066 [f]
result = true;
}
}
}
}
}
return result;
}
}