001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.imaging.common; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PushbackInputStream; 024import java.util.Map; 025 026import org.apache.commons.imaging.ImageReadException; 027 028/** 029 * A rudimentary preprocessor and parser for the C programming 030 * language. 031 * 032 * FIXME replace this by a parser generated via ANTLR (if we really need it?!) 033 */ 034public class BasicCParser { 035 private final PushbackInputStream is; 036 037 public BasicCParser(final ByteArrayInputStream is) { 038 this.is = new PushbackInputStream(is); 039 } 040 041 public String nextToken() throws IOException, ImageReadException { 042 // I don't know how complete the C parsing in an XPM file 043 // is meant to be, this is just the very basics... 044 045 boolean inString = false; 046 boolean inIdentifier = false; 047 boolean hadBackSlash = false; 048 final StringBuilder token = new StringBuilder(); 049 for (int c = is.read(); c != -1; c = is.read()) { 050 if (inString) { 051 switch (c) { 052 case '\\': 053 token.append('\\'); 054 hadBackSlash = !hadBackSlash; 055 break; 056 case '"': 057 token.append('"'); 058 if (!hadBackSlash) { 059 return token.toString(); 060 } 061 hadBackSlash = false; 062 break; 063 case '\r': 064 case '\n': 065 throw new ImageReadException( 066 "Unterminated string in XPM file"); 067 default: 068 token.append((char) c); 069 hadBackSlash = false; 070 break; 071 } 072 } else if (inIdentifier) { 073 if (!Character.isLetterOrDigit(c) && (c != '_')) { 074 is.unread(c); 075 return token.toString(); 076 } 077 token.append((char) c); 078 } else { 079 if (c == '"') { 080 token.append('"'); 081 inString = true; 082 } else if (Character.isLetterOrDigit(c) || c == '_') { 083 token.append((char) c); 084 inIdentifier = true; 085 } else if (c == '{' || c == '}' || c == '[' || c == ']' 086 || c == '*' || c == ';' || c == '=' || c == ',') { 087 token.append((char) c); 088 return token.toString(); 089 } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { 090 // ignore 091 } else { 092 throw new ImageReadException( 093 "Unhandled/invalid character '" + ((char) c) 094 + "' found in XPM file"); 095 } 096 } 097 } 098 099 if (inIdentifier) { 100 return token.toString(); 101 } 102 if (inString) { 103 throw new ImageReadException("Unterminated string ends XMP file"); 104 } 105 return null; 106 } 107 108 public static ByteArrayOutputStream preprocess(final InputStream is, 109 final StringBuilder firstComment, final Map<String, String> defines) 110 throws IOException, ImageReadException { 111 boolean inSingleQuotes = false; 112 boolean inString = false; 113 boolean inComment = false; 114 boolean inDirective = false; 115 boolean hadSlash = false; 116 boolean hadStar = false; 117 boolean hadBackSlash = false; 118 final ByteArrayOutputStream out = new ByteArrayOutputStream(); 119 boolean seenFirstComment = (firstComment == null); 120 final StringBuilder directiveBuffer = new StringBuilder(); 121 for (int c = is.read(); c != -1; c = is.read()) { 122 if (inComment) { 123 if (c == '*') { 124 if (hadStar && !seenFirstComment) { 125 firstComment.append('*'); 126 } 127 hadStar = true; 128 } else if (c == '/') { 129 if (hadStar) { 130 hadStar = false; 131 inComment = false; 132 seenFirstComment = true; 133 } else { 134 if (!seenFirstComment) { 135 firstComment.append((char) c); 136 } 137 } 138 } else { 139 if (hadStar && !seenFirstComment) { 140 firstComment.append('*'); 141 } 142 hadStar = false; 143 if (!seenFirstComment) { 144 firstComment.append((char) c); 145 } 146 } 147 } else if (inSingleQuotes) { 148 switch (c) { 149 case '\\': 150 if (hadBackSlash) { 151 out.write('\\'); 152 out.write('\\'); 153 hadBackSlash = false; 154 } else { 155 hadBackSlash = true; 156 } 157 break; 158 case '\'': 159 if (hadBackSlash) { 160 out.write('\\'); 161 hadBackSlash = false; 162 } else { 163 inSingleQuotes = false; 164 } 165 out.write('\''); 166 break; 167 case '\r': 168 case '\n': 169 throw new ImageReadException("Unterminated single quote in file"); 170 default: 171 if (hadBackSlash) { 172 out.write('\\'); 173 hadBackSlash = false; 174 } 175 out.write(c); 176 break; 177 } 178 } else if (inString) { 179 switch (c) { 180 case '\\': 181 if (hadBackSlash) { 182 out.write('\\'); 183 out.write('\\'); 184 hadBackSlash = false; 185 } else { 186 hadBackSlash = true; 187 } 188 break; 189 case '"': 190 if (hadBackSlash) { 191 out.write('\\'); 192 hadBackSlash = false; 193 } else { 194 inString = false; 195 } 196 out.write('"'); 197 break; 198 case '\r': 199 case '\n': 200 throw new ImageReadException("Unterminated string in file"); 201 default: 202 if (hadBackSlash) { 203 out.write('\\'); 204 hadBackSlash = false; 205 } 206 out.write(c); 207 break; 208 } 209 } else if (inDirective) { 210 if (c == '\r' || c == '\n') { 211 inDirective = false; 212 final String[] tokens = tokenizeRow(directiveBuffer.toString()); 213 if (tokens.length < 2 || tokens.length > 3) { 214 throw new ImageReadException("Bad preprocessor directive"); 215 } 216 if (!tokens[0].equals("define")) { 217 throw new ImageReadException("Invalid/unsupported " 218 + "preprocessor directive '" + tokens[0] + "'"); 219 } 220 defines.put(tokens[1], (tokens.length == 3) ? tokens[2] 221 : null); 222 directiveBuffer.setLength(0); 223 } else { 224 directiveBuffer.append((char) c); 225 } 226 } else { 227 switch (c) { 228 case '/': 229 if (hadSlash) { 230 out.write('/'); 231 } 232 hadSlash = true; 233 break; 234 case '*': 235 if (hadSlash) { 236 inComment = true; 237 hadSlash = false; 238 } else { 239 out.write(c); 240 } 241 break; 242 case '\'': 243 if (hadSlash) { 244 out.write('/'); 245 } 246 hadSlash = false; 247 out.write(c); 248 inSingleQuotes = true; 249 break; 250 case '"': 251 if (hadSlash) { 252 out.write('/'); 253 } 254 hadSlash = false; 255 out.write(c); 256 inString = true; 257 break; 258 case '#': 259 if (defines == null) { 260 throw new ImageReadException("Unexpected preprocessor directive"); 261 } 262 inDirective = true; 263 break; 264 default: 265 if (hadSlash) { 266 out.write('/'); 267 } 268 hadSlash = false; 269 out.write(c); 270 // Only whitespace allowed before first comment: 271 if (c != ' ' && c != '\t' && c != '\r' && c != '\n') { 272 seenFirstComment = true; 273 } 274 break; 275 } 276 } 277 } 278 if (hadSlash) { 279 out.write('/'); 280 } 281 if (hadStar) { 282 out.write('*'); 283 } 284 if (inString) { 285 throw new ImageReadException("Unterminated string at the end of file"); 286 } 287 if (inComment) { 288 throw new ImageReadException("Unterminated comment at the end of file"); 289 } 290 return out; 291 } 292 293 public static String[] tokenizeRow(final String row) { 294 final String[] tokens = row.split("[ \t]"); 295 int numLiveTokens = 0; 296 for (final String token : tokens) { 297 if (token != null && !token.isEmpty()) { 298 ++numLiveTokens; 299 } 300 } 301 final String[] liveTokens = new String[numLiveTokens]; 302 int next = 0; 303 for (final String token : tokens) { 304 if (token != null && !token.isEmpty()) { 305 liveTokens[next++] = token; 306 } 307 } 308 return liveTokens; 309 } 310 311 public static void unescapeString(final StringBuilder stringBuilder, final String string) 312 throws ImageReadException { 313 if (string.length() < 2) { 314 throw new ImageReadException("Parsing XPM file failed, " 315 + "string is too short"); 316 } 317 if (string.charAt(0) != '"' 318 || string.charAt(string.length() - 1) != '"') { 319 throw new ImageReadException("Parsing XPM file failed, " 320 + "string not surrounded by '\"'"); 321 } 322 boolean hadBackSlash = false; 323 for (int i = 1; i < (string.length() - 1); i++) { 324 final char c = string.charAt(i); 325 if (hadBackSlash) { 326 if (c == '\\') { 327 stringBuilder.append('\\'); 328 } else if (c == '"') { 329 stringBuilder.append('"'); 330 } else if (c == '\'') { 331 stringBuilder.append('\''); 332 } else if (c == 'x') { 333 if (i + 2 >= string.length()) { 334 throw new ImageReadException( 335 "Parsing XPM file failed, " 336 + "hex constant in string too short"); 337 } 338 final char hex1 = string.charAt(i + 1); 339 final char hex2 = string.charAt(i + 2); 340 i += 2; 341 int constant; 342 try { 343 constant = Integer.parseInt(hex1 + Character.toString(hex2), 16); 344 } catch (final NumberFormatException nfe) { 345 throw new ImageReadException( 346 "Parsing XPM file failed, " 347 + "hex constant invalid", nfe); 348 } 349 stringBuilder.append((char) constant); 350 } else { 351 switch (c) { 352 case '0': 353 case '1': 354 case '2': 355 case '3': 356 case '4': 357 case '5': 358 case '6': 359 case '7': 360 int length = 1; 361 if (i + 1 < string.length() && '0' <= string.charAt(i + 1) 362 && string.charAt(i + 1) <= '7') { 363 ++length; 364 } 365 if (i + 2 < string.length() && '0' <= string.charAt(i + 2) 366 && string.charAt(i + 2) <= '7') { 367 ++length; 368 } 369 int constant = 0; 370 for (int j = 0; j < length; j++) { 371 constant *= 8; 372 constant += (string.charAt(i + j) - '0'); 373 } 374 i += length - 1; 375 stringBuilder.append((char) constant); 376 break; 377 case 'a': 378 stringBuilder.append((char) 0x07); 379 break; 380 case 'b': 381 stringBuilder.append((char) 0x08); 382 break; 383 case 'f': 384 stringBuilder.append((char) 0x0c); 385 break; 386 case 'n': 387 stringBuilder.append((char) 0x0a); 388 break; 389 case 'r': 390 stringBuilder.append((char) 0x0d); 391 break; 392 case 't': 393 stringBuilder.append((char) 0x09); 394 break; 395 case 'v': 396 stringBuilder.append((char) 0x0b); 397 break; 398 default: 399 throw new ImageReadException("Parsing XPM file failed, " 400 + "invalid escape sequence"); 401 } 402 } 403 hadBackSlash = false; 404 } else { 405 if (c == '\\') { 406 hadBackSlash = true; 407 } else if (c == '"') { 408 throw new ImageReadException("Parsing XPM file failed, " 409 + "extra '\"' found in string"); 410 } else { 411 stringBuilder.append(c); 412 } 413 } 414 } 415 if (hadBackSlash) { 416 throw new ImageReadException("Parsing XPM file failed, " 417 + "unterminated escape sequence found in string"); 418 } 419 } 420}