001    /**
002     * ========================================
003     * JFreeReport : a free Java report library
004     * ========================================
005     *
006     * Project Info:  http://reporting.pentaho.org/
007     *
008     * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors.
009     *
010     * This library is free software; you can redistribute it and/or modify it under the terms
011     * of the GNU Lesser General Public License as published by the Free Software Foundation;
012     * either version 2.1 of the License, or (at your option) any later version.
013     *
014     * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
015     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
016     * See the GNU Lesser General Public License for more details.
017     *
018     * You should have received a copy of the GNU Lesser General Public License along with this
019     * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
020     * Boston, MA 02111-1307, USA.
021     *
022     * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
023     * in the United States and other countries.]
024     *
025     * ------------
026     * $Id: CharacterEntityParser.java 3048 2007-07-28 18:02:42Z tmorgner $
027     * ------------
028     * (C) Copyright 2000-2005, by Object Refinery Limited.
029     * (C) Copyright 2005-2007, by Pentaho Corporation.
030     */
031    package org.jfree.report.util;
032    
033    import java.util.Enumeration;
034    import java.util.Properties;
035    
036    /**
037     * The character entity parser replaces all known occurrences of an entity in the format
038     * &entityname;.
039     *
040     * @author Thomas Morgner
041     */
042    public class CharacterEntityParser
043    {
044      /**
045       * the entities, keyed by entity name.
046       */
047      private final Properties entities;
048    
049      /**
050       * the reverse lookup entities, keyed by character.
051       */
052      private final Properties reverse;
053    
054      /**
055       * Creates a new CharacterEntityParser and initializes the parser with the given set of
056       * entities.
057       *
058       * @param characterEntities the entities used for the parser
059       */
060      public CharacterEntityParser (final Properties characterEntities)
061      {
062        entities = characterEntities;
063        reverse = new Properties();
064        final Enumeration keys = entities.keys();
065        while (keys.hasMoreElements())
066        {
067          final String key = (String) keys.nextElement();
068          final String value = entities.getProperty(key);
069          reverse.setProperty(value, key);
070        }
071      }
072    
073      /**
074       * create a new Character entity parser and initializes the parser with the entities
075       * defined in the XML standard.
076       *
077       * @return the CharacterEntityParser initialized with XML entities.
078       */
079      public static CharacterEntityParser createXMLEntityParser ()
080      {
081        final Properties entities = new Properties();
082        entities.setProperty("amp", "&");
083        entities.setProperty("quot", "\"");
084        entities.setProperty("lt", "<");
085        entities.setProperty("gt", ">");
086        entities.setProperty("apos", "\u0027");
087        return new CharacterEntityParser(entities);
088      }
089    
090      /**
091       * returns the entities used in the parser.
092       *
093       * @return the properties for this parser.
094       */
095      private Properties getEntities ()
096      {
097        return entities;
098      }
099    
100      /**
101       * returns the reverse-lookup table for the entities.
102       *
103       * @return the reverse-lookup properties for this parsers.
104       */
105      private Properties getReverse ()
106      {
107        return reverse;
108      }
109    
110      /**
111       * Looks up the character for the entity name specified in <code>key</code>.
112       *
113       * @param key the entity name
114       * @return the character as string with a length of 1
115       */
116      private String lookupCharacter (final String key)
117      {
118        return getEntities().getProperty(key);
119      }
120    
121      /**
122       * Performs a reverse lookup, to retrieve the entity name for a given character.
123       *
124       * @param character the character that should be translated into the entity
125       * @return the entity name for the character or the untranslated character.
126       */
127      private String lookupEntity (final String character)
128      {
129        final String val = getReverse().getProperty(character);
130        if (val == null)
131        {
132          return null;
133        }
134        else
135        {
136          return "&" + val + ";";
137        }
138      }
139    
140      /**
141       * Encode the given String, so that all known entites are encoded. All characters
142       * represented by these entites are now removed from the string.
143       *
144       * @param value the original string
145       * @return the encoded string.
146       */
147      public String encodeEntities (final String value)
148      {
149        final StringBuffer writer = new StringBuffer();
150        for (int i = 0; i < value.length(); i++)
151        {
152          final String character = String.valueOf(value.charAt(i));
153          final String lookup = lookupEntity(character);
154          if (lookup == null)
155          {
156            writer.append(character);
157          }
158          else
159          {
160            writer.append(lookup);
161          }
162        }
163        return writer.toString();
164      }
165    
166      /**
167       * Decode the string, all known entities are replaced by their resolved characters.
168       *
169       * @param value the string that should be decoded.
170       * @return the decoded string.
171       */
172      public String decodeEntities (final String value)
173      {
174        int parserIndex = 0;
175        int subStart = value.indexOf("&", parserIndex);
176        if (subStart == -1)
177        {
178          return value;
179        }
180        int subEnd = value.indexOf(";", subStart);
181        if (subEnd == -1)
182        {
183          return value;
184        }
185    
186        final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart));
187        do
188        {
189          // at this point we know, that there is at least one entity ..
190          if (value.charAt(subStart + 1) == '#')
191          {
192            final int subValue = TextUtilities.parseInt(value.substring(subStart + 2, subEnd), 0);
193            if ((subValue >= 1) && (subValue <= 65536))
194            {
195              final char[] chr = new char[1];
196              chr[0] = (char) subValue;
197              bufValue.append(chr);
198            }
199            else
200            {
201              // invalid entity, do not decode ..
202              bufValue.append(value.substring(subStart, subEnd));
203            }
204          }
205          else
206          {
207            final String entity = value.substring(subStart + 1, subEnd);
208            final String replaceString = lookupCharacter(entity);
209            if (replaceString != null)
210            {
211              bufValue.append(decodeEntities(replaceString));
212            }
213            else
214            {
215              bufValue.append("&");
216              bufValue.append(entity);
217              bufValue.append(";");
218            }
219          }
220          parserIndex = subEnd + 1;
221          subStart = value.indexOf("&", parserIndex);
222          if (subStart == -1)
223          {
224            bufValue.append(value.substring(parserIndex));
225            subEnd = -1;
226          }
227          else
228          {
229            subEnd = value.indexOf(";", subStart);
230            if (subEnd == -1)
231            {
232              bufValue.append(value.substring(parserIndex));
233            }
234            else
235            {
236              bufValue.append(value.substring(parserIndex, subStart));
237            }
238          }
239        }
240        while (subStart != -1 && subEnd != -1);
241    
242        return bufValue.toString();
243      }
244    }
245