001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.imaging.common;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Map;
025
026import org.apache.commons.imaging.ImageReadException;
027
028/**
029 * A rudimentary preprocessor and parser for the C programming
030 * language.
031 *
032 * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
033 */
034public class BasicCParser {
035    private final PushbackInputStream is;
036
037    public BasicCParser(final ByteArrayInputStream is) {
038        this.is = new PushbackInputStream(is);
039    }
040
041    public String nextToken() throws IOException, ImageReadException {
042        // I don't know how complete the C parsing in an XPM file
043        // is meant to be, this is just the very basics...
044
045        boolean inString = false;
046        boolean inIdentifier = false;
047        boolean hadBackSlash = false;
048        final StringBuilder token = new StringBuilder();
049        for (int c = is.read(); c != -1; c = is.read()) {
050            if (inString) {
051                switch (c) {
052                case '\\':
053                    token.append('\\');
054                    hadBackSlash = !hadBackSlash;
055                    break;
056                case '"':
057                    token.append('"');
058                    if (!hadBackSlash) {
059                        return token.toString();
060                    }
061                    hadBackSlash = false;
062                    break;
063                case '\r':
064                case '\n':
065                    throw new ImageReadException(
066                            "Unterminated string in XPM file");
067                default:
068                    token.append((char) c);
069                    hadBackSlash = false;
070                    break;
071                }
072            } else if (inIdentifier) {
073                if (!Character.isLetterOrDigit(c) && (c != '_')) {
074                    is.unread(c);
075                    return token.toString();
076                }
077                token.append((char) c);
078            } else {
079                if (c == '"') {
080                    token.append('"');
081                    inString = true;
082                } else if (Character.isLetterOrDigit(c) || c == '_') {
083                    token.append((char) c);
084                    inIdentifier = true;
085                } else if (c == '{' || c == '}' || c == '[' || c == ']'
086                        || c == '*' || c == ';' || c == '=' || c == ',') {
087                    token.append((char) c);
088                    return token.toString();
089                } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
090                    // ignore
091                } else {
092                    throw new ImageReadException(
093                            "Unhandled/invalid character '" + ((char) c)
094                                    + "' found in XPM file");
095                }
096            }
097        }
098
099        if (inIdentifier) {
100            return token.toString();
101        }
102        if (inString) {
103            throw new ImageReadException("Unterminated string ends XMP file");
104        }
105        return null;
106    }
107
108    public static ByteArrayOutputStream preprocess(final InputStream is,
109            final StringBuilder firstComment, final Map<String, String> defines)
110            throws IOException, ImageReadException {
111        boolean inSingleQuotes = false;
112        boolean inString = false;
113        boolean inComment = false;
114        boolean inDirective = false;
115        boolean hadSlash = false;
116        boolean hadStar = false;
117        boolean hadBackSlash = false;
118        final ByteArrayOutputStream out = new ByteArrayOutputStream();
119        boolean seenFirstComment = (firstComment == null);
120        final StringBuilder directiveBuffer = new StringBuilder();
121        for (int c = is.read(); c != -1; c = is.read()) {
122            if (inComment) {
123                if (c == '*') {
124                    if (hadStar && !seenFirstComment) {
125                        firstComment.append('*');
126                    }
127                    hadStar = true;
128                } else if (c == '/') {
129                    if (hadStar) {
130                        hadStar = false;
131                        inComment = false;
132                        seenFirstComment = true;
133                    } else {
134                        if (!seenFirstComment) {
135                            firstComment.append((char) c);
136                        }
137                    }
138                } else {
139                    if (hadStar && !seenFirstComment) {
140                        firstComment.append('*');
141                    }
142                    hadStar = false;
143                    if (!seenFirstComment) {
144                        firstComment.append((char) c);
145                    }
146                }
147            } else if (inSingleQuotes) {
148                switch (c) {
149                case '\\':
150                    if (hadBackSlash) {
151                        out.write('\\');
152                        out.write('\\');
153                        hadBackSlash = false;
154                    } else {
155                        hadBackSlash = true;
156                    }
157                    break;
158                case '\'':
159                    if (hadBackSlash) {
160                        out.write('\\');
161                        hadBackSlash = false;
162                    } else {
163                        inSingleQuotes = false;
164                    }
165                    out.write('\'');
166                    break;
167                case '\r':
168                case '\n':
169                    throw new ImageReadException("Unterminated single quote in file");
170                default:
171                    if (hadBackSlash) {
172                        out.write('\\');
173                        hadBackSlash = false;
174                    }
175                    out.write(c);
176                    break;
177                }
178            } else if (inString) {
179                switch (c) {
180                case '\\':
181                    if (hadBackSlash) {
182                        out.write('\\');
183                        out.write('\\');
184                        hadBackSlash = false;
185                    } else {
186                        hadBackSlash = true;
187                    }
188                    break;
189                case '"':
190                    if (hadBackSlash) {
191                        out.write('\\');
192                        hadBackSlash = false;
193                    } else {
194                        inString = false;
195                    }
196                    out.write('"');
197                    break;
198                case '\r':
199                case '\n':
200                    throw new ImageReadException("Unterminated string in file");
201                default:
202                    if (hadBackSlash) {
203                        out.write('\\');
204                        hadBackSlash = false;
205                    }
206                    out.write(c);
207                    break;
208                }
209            } else if (inDirective) {
210                if (c == '\r' || c == '\n') {
211                    inDirective = false;
212                    final String[] tokens = tokenizeRow(directiveBuffer.toString());
213                    if (tokens.length < 2 || tokens.length > 3) {
214                        throw new ImageReadException("Bad preprocessor directive");
215                    }
216                    if (!tokens[0].equals("define")) {
217                        throw new ImageReadException("Invalid/unsupported "
218                                + "preprocessor directive '" + tokens[0] + "'");
219                    }
220                    defines.put(tokens[1], (tokens.length == 3) ? tokens[2]
221                            : null);
222                    directiveBuffer.setLength(0);
223                } else {
224                    directiveBuffer.append((char) c);
225                }
226            } else {
227                switch (c) {
228                case '/':
229                    if (hadSlash) {
230                        out.write('/');
231                    }
232                    hadSlash = true;
233                    break;
234                case '*':
235                    if (hadSlash) {
236                        inComment = true;
237                        hadSlash = false;
238                    } else {
239                        out.write(c);
240                    }
241                    break;
242                case '\'':
243                    if (hadSlash) {
244                        out.write('/');
245                    }
246                    hadSlash = false;
247                    out.write(c);
248                    inSingleQuotes = true;
249                    break;
250                case '"':
251                    if (hadSlash) {
252                        out.write('/');
253                    }
254                    hadSlash = false;
255                    out.write(c);
256                    inString = true;
257                    break;
258                case '#':
259                    if (defines == null) {
260                        throw new ImageReadException("Unexpected preprocessor directive");
261                    }
262                    inDirective = true;
263                    break;
264                default:
265                    if (hadSlash) {
266                        out.write('/');
267                    }
268                    hadSlash = false;
269                    out.write(c);
270                    // Only whitespace allowed before first comment:
271                    if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
272                        seenFirstComment = true;
273                    }
274                    break;
275                }
276            }
277        }
278        if (hadSlash) {
279            out.write('/');
280        }
281        if (hadStar) {
282            out.write('*');
283        }
284        if (inString) {
285            throw new ImageReadException("Unterminated string at the end of file");
286        }
287        if (inComment) {
288            throw new ImageReadException("Unterminated comment at the end of file");
289        }
290        return out;
291    }
292
293    public static String[] tokenizeRow(final String row) {
294        final String[] tokens = row.split("[ \t]");
295        int numLiveTokens = 0;
296        for (final String token : tokens) {
297            if (token != null && !token.isEmpty()) {
298                ++numLiveTokens;
299            }
300        }
301        final String[] liveTokens = new String[numLiveTokens];
302        int next = 0;
303        for (final String token : tokens) {
304            if (token != null && !token.isEmpty()) {
305                liveTokens[next++] = token;
306            }
307        }
308        return liveTokens;
309    }
310
311    public static void unescapeString(final StringBuilder stringBuilder, final String string)
312            throws ImageReadException {
313        if (string.length() < 2) {
314            throw new ImageReadException("Parsing XPM file failed, "
315                    + "string is too short");
316        }
317        if (string.charAt(0) != '"'
318                || string.charAt(string.length() - 1) != '"') {
319            throw new ImageReadException("Parsing XPM file failed, "
320                    + "string not surrounded by '\"'");
321        }
322        boolean hadBackSlash = false;
323        for (int i = 1; i < (string.length() - 1); i++) {
324            final char c = string.charAt(i);
325            if (hadBackSlash) {
326                if (c == '\\') {
327                    stringBuilder.append('\\');
328                } else if (c == '"') {
329                    stringBuilder.append('"');
330                } else if (c == '\'') {
331                    stringBuilder.append('\'');
332                } else if (c == 'x') {
333                    if (i + 2 >= string.length()) {
334                        throw new ImageReadException(
335                                "Parsing XPM file failed, "
336                                        + "hex constant in string too short");
337                    }
338                    final char hex1 = string.charAt(i + 1);
339                    final char hex2 = string.charAt(i + 2);
340                    i += 2;
341                    int constant;
342                    try {
343                        constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
344                    } catch (final NumberFormatException nfe) {
345                        throw new ImageReadException(
346                                "Parsing XPM file failed, "
347                                        + "hex constant invalid", nfe);
348                    }
349                    stringBuilder.append((char) constant);
350                } else {
351                    switch (c) {
352                    case '0':
353                    case '1':
354                    case '2':
355                    case '3':
356                    case '4':
357                    case '5':
358                    case '6':
359                    case '7':
360                        int length = 1;
361                        if (i + 1 < string.length() && '0' <= string.charAt(i + 1)
362                                && string.charAt(i + 1) <= '7') {
363                            ++length;
364                        }
365                        if (i + 2 < string.length() && '0' <= string.charAt(i + 2)
366                                && string.charAt(i + 2) <= '7') {
367                            ++length;
368                        }
369                        int constant = 0;
370                        for (int j = 0; j < length; j++) {
371                            constant *= 8;
372                            constant += (string.charAt(i + j) - '0');
373                        }
374                        i += length - 1;
375                        stringBuilder.append((char) constant);
376                        break;
377                    case 'a':
378                        stringBuilder.append((char) 0x07);
379                        break;
380                    case 'b':
381                        stringBuilder.append((char) 0x08);
382                        break;
383                    case 'f':
384                        stringBuilder.append((char) 0x0c);
385                        break;
386                    case 'n':
387                        stringBuilder.append((char) 0x0a);
388                        break;
389                    case 'r':
390                        stringBuilder.append((char) 0x0d);
391                        break;
392                    case 't':
393                        stringBuilder.append((char) 0x09);
394                        break;
395                    case 'v':
396                        stringBuilder.append((char) 0x0b);
397                        break;
398                    default:
399                        throw new ImageReadException("Parsing XPM file failed, "
400                                + "invalid escape sequence");
401                    }
402                }
403                hadBackSlash = false;
404            } else {
405                if (c == '\\') {
406                    hadBackSlash = true;
407                } else if (c == '"') {
408                    throw new ImageReadException("Parsing XPM file failed, "
409                            + "extra '\"' found in string");
410                } else {
411                    stringBuilder.append(c);
412                }
413            }
414        }
415        if (hadBackSlash) {
416            throw new ImageReadException("Parsing XPM file failed, "
417                    + "unterminated escape sequence found in string");
418        }
419    }
420}