Added: websites/production/commons/content/proper/commons-configuration/javadocs/v1.10/userguide/howto_xml.html ============================================================================== --- websites/production/commons/content/proper/commons-configuration/javadocs/v1.10/userguide/howto_xml.html (added) +++ websites/production/commons/content/proper/commons-configuration/javadocs/v1.10/userguide/howto_xml.html Wed Sep 24 20:29:01 2014 @@ -0,0 +1,1565 @@ +<!DOCTYPE html> +<!-- + | Generated by Apache Maven Doxia at 24 September 2014 + | Rendered using Apache Maven Fluido Skin 1.3.0 +--> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <meta charset="iso-8859-1" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta name="author" content="Oliver Heger" /> + <meta name="Date-Revision-yyyymmdd" content="20140924" /> + <meta http-equiv="Content-Language" content="en" /> + <title>Commons Configuration - + Hierarchical configurations and XML Howto</title> + + <link rel="stylesheet" href="../css/bootstrap.min.css" type="text/css" /> + <link rel="stylesheet" href="../css/site.css" type="text/css" /> + <link rel="stylesheet" href="../css/print.css" media="print" /> + + <script type="text/javascript" src="../js/jquery.min.js"></script> + <script type="text/javascript" src="../js/bootstrap.min.js"></script> + <script type="text/javascript" src="../js/prettify.min.js"></script> + <script type="text/javascript" src="../js/site.js"></script> + + +<link rel="stylesheet" type="text/css" media="all" href="../css/prettify.css"/> +<script src="../js/prettify.js" type="text/javascript"></script> +<script type="text/javascript">window.onload=function() { + prettyPrint(); + }</script> + </head> + + <body class="composite"> + <a href="http://commons.apache.org/" id="bannerLeft" title="Apache Commons logo"> + <img class="logo-left" src="../images/commons-logo.png" alt="Apache Commons logo"/> + </a> + <a href="../index.html" id="bannerRight"> + <img class="logo-right" src="../images/logo.png" alt="Commons Configuration"/> + </a> + <div class="clear"></div> + + <div class="navbar"> + <div class="navbar-inner"> + <div class="container-fluid"> + <a class="brand" href="http://commons.apache.org/proper/commons-configuration/">Apache Commons Configuration ™</a> + <ul class="nav"> + + <li id="publishDate">Last Published: 24 September 2014</li> + <li class="divider">|</li> <li id="projectVersion">Version: 1.10</li> + </ul> + <div class="pull-right"> <ul class="nav"> + <li> + <a href="http://www.apachecon.com/" class="externalLink" title="ApacheCon"> + ApacheCon</a> + </li> + <li> + <a href="http://www.apache.org" class="externalLink" title="Apache"> + Apache</a> + </li> + <li> + <a href="../../../" title="Commons"> + Commons</a> + </li> + </ul> +</div> + </div> + </div> + </div> + + <div class="container-fluid"> + <table class="layout-table"> + <tr> + <td class="sidebar"> + <div class="well sidebar-nav"> + <ul class="nav nav-list"> + <li class="nav-header">Configuration</li> + <li class="none"> + <a href="../index.html" title="Home"> + Home</a> + </li> + <li class="none"> + <a href="../../../configuration/download_configuration.cgi" title="Download"> + Download</a> + </li> + <li class="none"> + <a href="../changes-report.html" title="Release History"> + Release History</a> + </li> + <li class="none"> + <a href="../userguide/user_guide.html" title="User's Guide"> + User's Guide</a> + </li> + <li class="none"> + <a href="../dependencies.html" title="Runtime Dependencies"> + Runtime Dependencies</a> + </li> + <li class="none"> + <a href="../apidocs/index.html" title="Javadoc"> + Javadoc</a> + </li> + </ul> + <ul class="nav nav-list"> + <li class="nav-header"><i class="icon-cog"></i>Development</li> + <li class="none"> + <a href="../building.html" title="Building"> + Building</a> + </li> + <li class="none"> + <a href="../issue-tracking.html" title="Issue Tracking"> + Issue Tracking</a> + </li> + </ul> + <ul class="nav nav-list"> + <li class="nav-header"><i class="icon-info-sign"></i>Project Documentation</li> + <li class="collapsed"> + <a href="../project-info.html" title="Project Information"> + Project Information</a> + </li> + <li class="collapsed"> + <a href="../project-reports.html" title="Project Reports"> + Project Reports</a> + </li> + </ul> + <ul class="nav nav-list"> + <li class="nav-header">Commons</li> + <li class="none"> + <a href="../../../" title="Home"> + Home</a> + </li> + <li class="none"> + <a href="http://www.apache.org/licenses/" class="externalLink" title="License"> + License</a> + </li> + <li class="collapsed"> + <a href="../../../components.html" title="Components"> + Components</a> + </li> + <li class="collapsed"> + <a href="../../../sandbox/index.html" title="Sandbox"> + Sandbox</a> + </li> + <li class="collapsed"> + <a href="../../../dormant/index.html" title="Dormant"> + Dormant</a> + </li> + </ul> + <ul class="nav nav-list"> + <li class="nav-header">General Information</li> + <li class="none"> + <a href="../../../volunteering.html" title="Volunteering"> + Volunteering</a> + </li> + <li class="none"> + <a href="../../../patches.html" title="Contributing Patches"> + Contributing Patches</a> + </li> + <li class="none"> + <a href="../../../building.html" title="Building Components"> + Building Components</a> + </li> + <li class="none"> + <a href="../../../releases/index.html" title="Releasing Components"> + Releasing Components</a> + </li> + <li class="none"> + <a href="http://wiki.apache.org/commons/FrontPage" class="externalLink" title="Wiki"> + Wiki</a> + </li> + </ul> + <ul class="nav nav-list"> + <li class="nav-header">ASF</li> + <li class="none"> + <a href="http://www.apache.org/foundation/how-it-works.html" class="externalLink" title="How the ASF works"> + How the ASF works</a> + </li> + <li class="none"> + <a href="http://www.apache.org/foundation/getinvolved.html" class="externalLink" title="Get Involved"> + Get Involved</a> + </li> + <li class="none"> + <a href="http://www.apache.org/dev/" class="externalLink" title="Developer Resources"> + Developer Resources</a> + </li> + <li class="none"> + <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsorship"> + Sponsorship</a> + </li> + <li class="none"> + <a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks"> + Thanks</a> + </li> + </ul> + </div> + <div id="poweredBy"> + <a href="http://www.apache.org/events/current-event.html" title="ApacheCon" class="builtBy"> + <img class="builtBy" alt="ApacheCon" src="http://www.apache.org/events/current-event-125x125.png" /> + </a> + <a href="http://maven.apache.org/" title="Maven" class="builtBy"> + <img class="builtBy" alt="Maven" src="http://maven.apache.org/images/logos/maven-feather.png" /> + </a> + </div> + </td> + <td class="content"> + <!-- Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. --> + + + <div class="section"> +<h2>Using Hierarchical Configurations<a name="Using_Hierarchical_Configurations"></a></h2> + +<p> + This section explains how to use hierarchical + and structured XML datasets. + </p> + </div> + + +<div class="section"> +<h2>Hierarchical properties<a name="Hierarchical_properties"></a></h2> + +<p> + Many sources of configuration data have a hierarchical or tree-like + nature. They can represent data that is structured in many ways. + Such configuration sources are represented by classes derived from + <a href="../apidocs/org/apache/commons/configuration/HierarchicalConfiguration.html"> + <tt>HierarchicalConfiguration</tt></a>. + </p> + +<p> + Prominent examples of hierarchical configuration sources are XML + documents. They can be read and written using the + <a href="../apidocs/org/apache/commons/configuration/XMLConfiguration.html"> + <tt>XMLConfiguration</tt></a> class. This section explains how + to deal with such structured data and demonstrates the enhanced query + facilities supported by <tt>HierarchicalConfiguration</tt>. We + use XML documents as examples for structured configuration sources, + but the information provided here (especially the rules for accessing + properties) applies to other hierarchical configurations as well. + Examples for other hierarchical configuration classes are + </p> +<ul> + +<li><a href="../apidocs/org/apache/commons/configuration/CombinedConfiguration.html"> + <tt>CombinedConfiguration</tt></a></li> + +<li><a href="../apidocs/org/apache/commons/configuration/HierarchicalINIConfiguration.html"> + <tt>HierarchicalINIConfiguration</tt></a></li> + +<li><a href="../apidocs/org/apache/commons/configuration/plist/PropertyListConfiguration.html"> + <tt>PropertyListConfiguration</tt></a></li> + </ul> + + +<div class="section"> +<h3>Accessing properties in hierarchical configurations<a name="Accessing_properties_in_hierarchical_configurations"></a></h3> + +<p> + We will start with a simple XML document to show some basics + about accessing properties. The following file named + <tt>gui.xml</tt> is used as example document: + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1" ?> +<gui-definition> + <colors> + <background>#808080</background> + <text>#000000</text> + <header>#008000</header> + <link normal="#000080" visited="#800080"/> + <default>${colors.header}</default> + </colors> + <rowsPerPage>15</rowsPerPage> + <buttons> + <name>OK,Cancel,Help</name> + </buttons> + <numberFormat pattern="###\,###.##"/> +</gui-definition> +</pre></div> + +<p> + (As becomes obvious, this tutorial does not bother with good + design of XML documents, the example file should rather + demonstrate the different ways of accessing properties.) + To access the data stored in this document it must be loaded + by <tt>XMLConfiguration</tt>. Like other + <a href="howto_filebased.html">file based</a> + configuration classes <tt>XMLConfiguration</tt> supports + many ways of specifying the file to process. One way is to + pass the file name to the constructor as shown in the following + code fragment: + </p> + +<div class="source"> +<pre> +try +{ + XMLConfiguration config = new XMLConfiguration("tables.xml"); + // do something with config +} +catch(ConfigurationException cex) +{ + // something went wrong, e.g. the file was not found +} +</pre></div> + +<p> + If no exception was thrown, the properties defined in the + XML document are now available in the configuration object. + Other hierarchical configuration classes that operate on files + have corresponding constructors and methods for loading their data. + The following fragment shows how the properties can be accessed: + </p> + +<div class="source"> +<pre> +String backColor = config.getString("colors.background"); +String textColor = config.getString("colors.text"); +String linkNormal = config.getString("colors.link[@normal]"); +String defColor = config.getString("colors.default"); +int rowsPerPage = config.getInt("rowsPerPage"); +List<Object> buttons = config.getList("buttons.name"); +</pre></div> + +<p> + This listing demonstrates some important points about constructing + keys for accessing properties in hierarchical configuration sources and about + features of <tt>HierarchicalConfiguration</tt> in general: + </p> +<ul> + +<li> + Nested elements are accessed using a dot notation. In + the example document there is an element + <tt><text></tt> in the body of the + <tt><color></tt> element. The corresponding + key is <tt>color.text</tt>. + </li> + +<li> + The root element is ignored when constructing keys. In + the example you do not write + <tt>gui-definition.color.text</tt>, but only + <tt>color.text</tt>. + </li> + +<li> + Attributes of XML elements are accessed in a XPath like + notation. + </li> + +<li> + Interpolation can be used as in <tt>PropertiesConfiguration</tt>. + Here the <tt><default></tt> element in the + <tt>colors</tt> section refers to another color. + </li> + +<li> + Lists of properties can be defined in a short form using + the delimiter character (which is the comma by default). + In this example the <tt>buttons.name</tt> property + has the three values <i>OK</i>, <i>Cancel</i>, and + <i>Help</i>, so it is queried using the <tt>getList()</tt> + method. This works in attributes, too. Using the static + <tt>setDefaultDelimiter()</tt> method of + <tt>AbstractConfiguration</tt> you can globally + define a different delimiter character or - + by setting the delimiter to 0 - disabling this mechanism + completely. Placing a backslash before a delimiter + character will escape it. This is demonstrated in the + <tt>pattern</tt> attribute of the <tt>numberFormat</tt> + element. + </li> + </ul> + + +<p> + In the next section will show how data in a more complex XML + document can be processed. + </p> + </div> + +<div class="section"> +<h3>Complex hierarchical structures<a name="Complex_hierarchical_structures"></a></h3> + +<p> + Consider the following scenario: An application operates on + database tables and wants to load a definition of the database + schema from its configuration. A XML document provides this + information. It could look as follows: + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1" ?> + +<database> + <tables> + <table tableType="system"> + <name>users</name> + <fields> + <field> + <name>uid</name> + <type>long</type> + </field> + <field> + <name>uname</name> + <type>java.lang.String</type> + </field> + <field> + <name>firstName</name> + <type>java.lang.String</type> + </field> + <field> + <name>lastName</name> + <type>java.lang.String</type> + </field> + <field> + <name>email</name> + <type>java.lang.String</type> + </field> + </fields> + </table> + <table tableType="application"> + <name>documents</name> + <fields> + <field> + <name>docid</name> + <type>long</type> + </field> + <field> + <name>name</name> + <type>java.lang.String</type> + </field> + <field> + <name>creationDate</name> + <type>java.util.Date</type> + </field> + <field> + <name>authorID</name> + <type>long</type> + </field> + <field> + <name>version</name> + <type>int</type> + </field> + </fields> + </table> + </tables> +</database> +</pre></div> + +<p> + This XML is quite self explanatory; there is an arbitrary number + of table elements, each of it has a name and a list of fields. + A field in turn consists of a name and a data type. This + XML document (let's call it <tt>tables.xml</tt>) can be + loaded in exactly the same way as the simple document in the + section before. + </p> + +<p> + When we now want to access some of the properties we face a + problem: the syntax for constructing configuration keys we + learned so far is not powerful enough to access all of the data + stored in the tables document. + </p> + +<p> + Because the document contains a list of tables some properties + are defined more than once. E.g. the configuration key + <tt>tables.table.name</tt> refers to a <tt>name</tt> + element inside a <tt>table</tt> element inside a + <tt>tables</tt> element. This constellation happens to + occur twice in the tables document. + </p> + +<p> + Multiple definitions of a property do not cause problems and are + supported by all classes of Configuration. If such a property + is queried using <tt>getProperty()</tt>, the method + recognizes that there are multiple values for that property and + returns a collection with all these values. So we could write + </p> + +<div class="source"> +<pre> +Object prop = config.getProperty("tables.table.name"); +if(prop instanceof Collection) +{ + System.out.println("Number of tables: " + ((Collection<?>) prop).size()); +} +</pre></div> + +<p> + An alternative to this code would be the <tt>getList()</tt> + method of <tt>Configuration</tt>. If a property is known to + have multiple values (as is the table name property in this example), + <tt>getList()</tt> allows retrieving all values at once. + <b>Note:</b> it is legal to call <tt>getString()</tt> + or one of the other getter methods on a property with multiple + values; it returns the first element of the list. + </p> + </div> + +<div class="section"> +<h3>Accessing structured properties<a name="Accessing_structured_properties"></a></h3> + +<p> + Okay, we can obtain a list with the names of all defined + tables. In the same way we can retrieve a list with the names + of all table fields: just pass the key + <tt>tables.table.fields.field.name</tt> to the + <tt>getList()</tt> method. In our example this list + would contain 10 elements, the names of all fields of all tables. + This is fine, but how do we know, which field belongs to + which table? + </p> + +<p> + When working with such hierarchical structures the configuration keys + used to query properties can have an extended syntax. All components + of a key can be appended by a numerical value in parentheses that + determines the index of the affected property. So if we have two + <tt>table</tt> elements we can exactly specify, which one we + want to address by appending the corresponding index. This is + explained best by some examples: + </p> + +<p> + We will now provide some configuration keys and show the results + of a <tt>getProperty()</tt> call with these keys as arguments. + </p> +<dl> + +<dt><tt>tables.table(0).name</tt></dt> + +<dd> + Returns the name of the first table (all indices are 0 based), + in this example the string <i>users</i>. + </dd> + +<dt><tt>tables.table(0)[@tableType]</tt></dt> + +<dd> + Returns the value of the tableType attribute of the first + table (<i>system</i>). + </dd> + +<dt><tt>tables.table(1).name</tt></dt> + +<dd> + Analogous to the first example returns the name of the + second table (<i>documents</i>). + </dd> + +<dt><tt>tables.table(2).name</tt></dt> + +<dd> + Here the name of a third table is queried, but because there + are only two tables result is <b>null</b>. The fact that a + <b>null</b> value is returned for invalid indices can be used + to find out how many values are defined for a certain property: + just increment the index in a loop as long as valid objects + are returned. + </dd> + +<dt><tt>tables.table(1).fields.field.name</tt></dt> + +<dd> + Returns a collection with the names of all fields that + belong to the second table. With such kind of keys it is + now possible to find out, which fields belong to which table. + </dd> + +<dt><tt>tables.table(1).fields.field(2).name</tt></dt> + +<dd> + The additional index after field selects a certain field. + This expression represents the name of the third field in + the second table (<i>creationDate</i>). + </dd> + +<dt><tt>tables.table.fields.field(0).type</tt></dt> + +<dd> + This key may be a bit unusual but nevertheless completely + valid. It selects the data types of the first fields in all + tables. So here a collection would be returned with the + values [<i>long, long</i>]. + </dd> + </dl> + + +<p> + These examples should make the usage of indices quite clear. + Because each configuration key can contain an arbitrary number + of indices it is possible to navigate through complex structures of + hierarchical configurations; each property can be uniquely identified. + </p> + +<p> + Sometimes dealing with long property keys may become inconvenient, + especially if always the same properties are accessed. For this + case <tt>HierarchicalConfiguration</tt> provides a short cut + with the <tt>configurationAt()</tt> method. This method can + be passed a key that selects exactly one node of the hierarchy + of nodes contained in a hierarchical configuration. Then a new + hierarchical configuration will be returned whose root node is + the selected node. So all property keys passed into that + configuration should be relative to the new root node. For + instance, if we are only interested in information about the + first database table, we could do something like that: + </p> + +<div class="source"> +<pre> +HierarchicalConfiguration sub = config.configurationAt("tables.table(0)"); +String tableName = sub.getString("name"); // only need to provide relative path +List<Object> fieldNames = sub.getList("fields.field.name"); +</pre></div> + +<p> + For dealing with complex list-like structures there is another + short cut. Often it will be necessary to iterate over all items + in the list and access their (sub) properties. A good example are + the fields of the tables in our demo configuration. When you want + to process all fields of a table (e.g. for constructing a + <tt>CREATE TABLE</tt> statement), you will need all information + stored for them in the configuration. An option would be to use + the <tt>getList()</tt> method to fetch the required data one + by one: + </p> + +<div class="source"> +<pre> +List<Object> fieldNames = config.getList("tables.table(0).fields.field.name"); +List<Object> fieldTypes = config.getList("tables.table(0).fields.field.type"); +List<Object> ... // further calls for other data that might be stored in the config +</pre></div> + +<p> + But this is not very readable and will fail if not all field + elements contain the same set of data (for instance the + <tt>type</tt> property may be optional, then the list for + the types can contain less elements than the other lists). A + solution to these problems is the <tt>configurationsAt()</tt> + method, a close relative to the <tt>configurationAt()</tt> + method covered above. This method evaluates the passed in key and + collects all configuration nodes that match this criterion. Then + for each node a <tt>HierarchicalConfiguration</tt> object is + created with this node as root node. A list with these configuration + objects is returned. As the following example shows this comes in + very handy when processing list-like structures: + </p> + +<div class="source"> +<pre> +List<HierarchicalConfiguration> fields = + config.configurationsAt("tables.table(0).fields.field"); +for(HierarchicalConfiguration sub : fields) +{ + // sub contains all data about a single field + String fieldName = sub.getString("name"); + String fieldType = sub.getString("type"); + ... +</pre></div> + +<p> + The configurations returned by the <tt>configurationAt()</tt> and + <tt>configurationsAt()</tt> method are in fact instances of the + <a href="../apidocs/org/apache/commons/configuration/SubnodeConfiguration.html"> + <tt>SubnodeConfiguration</tt></a> class. The API documentation of + this class contains more information about its features and + limitations. + </p> + </div> + +<div class="section"> +<h3>Adding new properties<a name="Adding_new_properties"></a></h3> + +<p> + So far we have learned how to use indices to avoid ambiguities when + querying properties. The same problem occurs when adding new + properties to a structured configuration. As an example let's + assume we want to add a new field to the second table. New properties + can be added to a configuration using the <tt>addProperty()</tt> + method. Of course, we have to exactly specify where in the tree like structure new + data is to be inserted. A statement like + </p> + +<div class="source"> +<pre> +// Warning: This might cause trouble! +config.addProperty("tables.table.fields.field.name", "size"); +</pre></div> + +<p> + would not be sufficient because it does not contain all needed + information. How is such a statement processed by the + <tt>addProperty()</tt> method? + </p> + +<p> + <tt>addProperty()</tt> splits the provided key into its + single parts and navigates through the properties tree along the + corresponding element names. In this example it will start at the + root element and then find the <tt>tables</tt> element. The + next key part to be processed is <tt>table</tt>, but here a + problem occurs: the configuration contains two <tt>table</tt> + properties below the <tt>tables</tt> element. To get rid off + this ambiguity an index can be specified at this position in the + key that makes clear, which of the two properties should be + followed. <tt>tables.table(1).fields.field.name</tt> e.g. + would select the second <tt>table</tt> property. If an index + is missing, <tt>addProperty()</tt> always follows the last + available element. In our example this would be the second + <tt>table</tt>, too. + </p> + +<p> + The following parts of the key are processed in exactly the same + manner. Under the selected <tt>table</tt> property there is + exactly one <tt>fields</tt> property, so this step is not + problematic at all. In the next step the <tt>field</tt> part + has to be processed. At the actual position in the properties tree + there are multiple <tt>field</tt> (sub) properties. So we here + have the same situation as for the <tt>table</tt> part. + Because no explicit index is defined the last <tt>field</tt> + property is selected. The last part of the key passed to + <tt>addProperty()</tt> (<tt>name</tt> in this example) + will always be added as new property at the position that has + been reached in the former processing steps. So in our example + the last <tt>field</tt> property of the second table would + be given a new <tt>name</tt> sub property and the resulting + structure would look like the following listing: + </p> + +<div class="source"> +<pre> + ... + <table tableType="application"> + <name>documents</name> + <fields> + <field> + <name>docid</name> + <type>long</type> + </field> + <field> + <name>name</name> + <type>java.lang.String</type> + </field> + <field> + <name>creationDate</name> + <type>java.util.Date</type> + </field> + <field> + <name>authorID</name> + <type>long</type> + </field> + <field> + <name>version</name> + <name>size</name> <== Newly added property + <type>int</type> + </field> + </fields> + </table> + </tables> +</database> +</pre></div> + +<p> + This result is obviously not what was desired, but it demonstrates + how <tt>addProperty()</tt> works: the method follows an + existing branch in the properties tree and adds new leaves to it. + (If the passed in key does not match a branch in the existing tree, + a new branch will be added. E.g. if we pass the key + <tt>tables.table.data.first.test</tt>, the existing tree can be + navigated until the <tt>data</tt> part of the key. From here a + new branch is started with the remaining parts <tt>data</tt>, + <tt>first</tt> and <tt>test</tt>.) + </p> + +<p> + If we want a different behavior, we must explicitely tell + <tt>addProperty()</tt> what to do. In our example with the + new field our intension was to create a new branch for the + <tt>field</tt> part in the key, so that a new <tt>field</tt> + property is added to the structure rather than adding sub properties + to the last existing <tt>field</tt> property. This can be + achieved by specifying the special index <tt>(-1)</tt> at the + corresponding position in the key as shown below: + </p> + +<div class="source"> +<pre> +config.addProperty("tables.table(1).fields.field(-1).name", "size"); +config.addProperty("tables.table(1).fields.field.type", "int"); +</pre></div> + +<p> + The first line in this fragment specifies that a new branch is + to be created for the <tt>field</tt> property (index -1). + In the second line no index is specified for the field, so the + last one is used - which happens to be the field that has just + been created. So these two statements add a fully defined field + to the second table. This is the default pattern for adding new + properties or whole hierarchies of properties: first create a new + branch in the properties tree and then populate its sub properties. + As an additional example let's add a complete new table definition + to our example configuration: + </p> + +<div class="source"> +<pre> +// Add a new table element and define the name +config.addProperty("tables.table(-1).name", "versions"); + +// Add a new field to the new table +// (an index for the table is not necessary because the latest is used) +config.addProperty("tables.table.fields.field(-1).name", "id"); +config.addProperty("tables.table.fields.field.type", "int"); + +// Add another field to the new table +config.addProperty("tables.table.fields.field(-1).name", "date"); +config.addProperty("tables.table.fields.field.type", "java.sql.Date"); +... +</pre></div> + +<p> + For more information about adding properties to a hierarchical + configuration also have a look at the javadocs for + <tt>HierarchicalConfiguration</tt>. + </p> + </div> + +<div class="section"> +<h3>Escaping special characters<a name="Escaping_special_characters"></a></h3> + +<p> + Some characters in property keys or values require a special + treatment. + </p> + +<p> + Per default the dot character is used as delimiter by most + configuration classes (we will learn how to change this for + hierarchical configurations in a later section). In some + configuration formats however, dots can be contained in the + names of properties. For instance, in XML the dot is a legal + character that can occur in any tag. The same is true for the names + of properties in windows ini files. So the following XML + document is completely valid: + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1" ?> + +<configuration> + <test.value>42</test.value> + <test.complex> + <test.sub.element>many dots</test.sub.element> + </test.complex> +</configuration> +</pre></div> + +<p> + This XML document can be loaded by <tt>XMLConfiguration</tt> + without trouble, but when we want to access certain properties + we face a problem: The configuration claims that it does not + store any values for the properties with the keys + <tt>test.value</tt> or <tt>test.complex.test.sub.element</tt>! + </p> + +<p> + Of course, it is the dot character contained in the property + names, which causes this problem. A dot is always interpreted + as a delimiter between elements. So given the property key + <tt>test.value</tt> the configuration would look for an + element named <tt>test</tt> and then for a sub element + with the name <tt>value</tt>. To change this behavior it is + possible to escape a dot character, thus telling the configuration + that it is really part of an element name. This is simply done + by duplicating the dot. So the following statements will return + the desired property values: + </p> + +<div class="source"> +<pre> +int testVal = config.getInt("test..value"); +String complex = config.getString("test..complex.test..sub..element"); +</pre></div> + +<p> + Note the duplicated dots whereever the dot does not act as + delimiter. This way it is possible to access properties containing + dots in arbitrary combination. However, as you can see, the + escaping can be confusing sometimes. So if you have a choice, + you should avoid dots in the tag names of your XML configuration + files or other configuration sources. + </p> + +<p> + Another source of problems is related to list delimiter characters + in the values of properties. Like other configuration classes + <tt>XMLConfiguration</tt> implements + <a href="howto_basicfeatures.html#List_handling">list handling</a>. + This means that the values of XML elements and attributes are + checked whether they contain a list delimiter character. If this + is the case, the value is split, and a list property is created. + Per default this feature is enabled. Have a look at the + following example: + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1" ?> + +<configuration> + <pi>3,1415</pi> +</configuration> +</pre></div> + +<p> + Here we use the comma as delimiter for fraction digits (as is + standard for some languages). However, the configuration will + interpret the comma as list delimiter character and assign the + property <i>pi</i> the two values 3 and 1415. This was not + desired. + </p> + +<p> + XML has a natural way of defining list properties by simply + repeating elements. So defining multiple values of a property in + a single element or attribute is a rather untypical use case. + Unfortunately, early versions of Commons Configuration had list + delimiter splitting enabled per default. Later it became obvious + that this feature can cause serious problems related to the + interpretation of property values and the escaping of delimiter + characters. For reasons of backwards compatibility we have to + stick to this approach in the 1.x series though. + </p> + +<p> + In the next major release the handling of lists will propably be + reworked. Therefore it is recommended not to use this feature. + You are save if you disable it immediately after the creation of + an <tt>XMLConfiguration</tt> object (and before a file is + loaded). This can be achieved as follows: + </p> + +<div class="source"> +<pre> +XMLConfiguration config = new XMLConfiguration(); +config.setDelimiterParsingDisabled(true); +config.setAttributeSplittingDisabled(true); +config.load("config.xml"); +</pre></div> + </div> + </div> + + +<div class="section"> +<h2>Expression engines<a name="Expression_engines"></a></h2> + +<p> + In the previous chapters we saw many examples about how properties + in a <tt>XMLConfiguration</tt> object (or more general in a + <tt>HierarchicalConfiguration</tt> object, because this is the + base class, which implements this functionality) can be queried or + modified using a special syntax for the property keys. Well, this + was not the full truth. Actually, property keys are not processed + by the configuration object itself, but are delegated to a helper + object, a so called <i>Expression engine</i>. + </p> + +<p> + The separation of the task of interpreting property keys into a + helper object is a typical application of the <i>Strategy</i> + design pattern. In this case it also has the advantage that it + becomes possible to plug in different expression engines into a + <tt>HierarchicalConfiguration</tt> object. So by providing + different implementations of the + <a href="../apidocs/org/apache/commons/configuration/tree/ExpressionEngine.html"> + <tt>ExpressionEngine</tt></a> + interface hierarchical configurations can support alternative + expression languages for accessing their data. + </p> + +<p> + Before we discuss the available expression engines that ship + with Commons Configuration, it should be explained how an + expression engine can be associated with a configuration object. + <a href="../apidocs/org/apache/commons/configuration/HierarchicalConfiguration.html"> + <tt>HierarchicalConfiguration</tt></a> and all derived classes + provide a <tt>setExpressionEngine()</tt> method, which expects + an implementation of the <tt>ExpressionEngine</tt> interface as + argument. After this method was called, the configuration object will + use the passed expression engine, which means that all property keys + passed to methods like <tt>getProperty()</tt>, + <tt>getString()</tt>, or <tt>addProperty()</tt> must + conform to the syntax supported by this engine. Property keys + returned by the <tt>getKeys()</tt> method will follow this + syntax, too. + </p> + +<p> + In addition to instance specific expression engines that change the + behavior of single configuration objects it is also possible to set + a global expression engine. This engine is shared between all + hierarchical configuration objects, for which no specific expression + engine was set. The global expression engine can be set using the + static <tt>setDefaultExpressionEngine()</tt> method of + <tt>HierarchicalConfiguration</tt>. By invoking this method with + a custom expression engine the syntax of all hierarchical configuration + objects can be altered at once. + </p> + + +<div class="section"> +<h3>The default expression engine<a name="The_default_expression_engine"></a></h3> + +<p> + The syntax described so far for property keys of hierarchical + configurations is implemented by a specific implementation of the + <a href="../apidocs/org/apache/commons/configuration/tree/ExpressionEngine.html"> + <tt>ExpressionEngine</tt></a> interface called + <a href="../apidocs/org/apache/commons/configuration/tree/DefaultExpressionEngine.html"> + <tt>DefaultExpressionEngine</tt></a>. An instance of this class + is installed as the global expression engine in + <tt>HierarchicalConfiguration</tt>. So all newly created + instances of this class will make use of this engine (which is + the reason that our examples above worked). + </p> + +<p> + After reading the examples of property keys provided so far in + this document you should have a sound understanding regarding + the features and the syntax supported by the + <tt>DefaultExpressionEngine</tt> class. But it can do a + little bit more for you: it defines a bunch of properties, + which can be used to customize most tokens that can appear in a + valid property key. You prefer curly brackets over paranthesis + as index markers? You find the duplicated dot as escaped + property delimiter counter-intuitive? Well, simply go ahead and + change it! The following example shows how the syntax of a + <tt>DefaultExpressionEngine</tt> object is modified. Then + this object is set as the global expression engine, so that from + now on all hierarchical configuration objects will take up this + new syntax: + </p> + +<div class="source"> +<pre> +DefaultExpressionEngine engine = new DefaultExpressionEngine(); + +// Use a slash as property delimiter +engine.setPropertyDelimiter("/"); +// Indices should be provided in curly brackets +engine.setIndexStart("{"); +engine.setIndexEnd("}"); +// For attributes use simply a @ +engine.setAttributeStart("@"); +engine.setAttributeEnd(null); +// A Backslash is used for escaping property delimiters +engine.setEscapedDelimiter("\\/"); + +// Now install this engine as the global engine +HierarchicalConfiguration.setDefaultExpressionEngine(engine); + +// Access properties using the new syntax +HierarchicalConfiguration config = ... +String tableName = config.getString("tables/table{0}/name"); +String tableType = config.getString("tables/table{0}@type"); + </pre></div> + +<p> + <i>Tip:</i> Sometimes when processing an XML document you + don't want to distinguish between attributes and "normal" + child nodes. You can achieve this by setting the + <tt>AttributeEnd</tt> property to <b>null</b> and the + <tt>AttributeStart</tt> property to the same value as the + <tt>PropertyDelimiter</tt> property. Then the syntax for + accessing attributes is the same as the syntax for other + properties: + </p> + +<div class="source"> +<pre> +DefaultExpressionEngine engine = new DefaultExpressionEngine(); +engine.setAttributeEnd(null); +engine.setAttributeStart(engine.getPropertyDelimiter()); +... +Object value = config.getProperty("tables.table(0).name"); +// name can either be a child node of table or an attribute + </pre></div> + </div> + + +<div class="section"> +<h3>The XPATH expression engine<a name="The_XPATH_expression_engine"></a></h3> + +<p> + The expression language provided by the <tt>DefaultExpressionEngine</tt> + class is powerful enough to address all properties in a + hierarchical configuration, but it is not always convenient to + use. Especially if list structures are involved, it is often + necessary to iterate through the whole list to find a certain + element. + </p> + +<p> + Think about our example configuration that stores information about + database tables. A use case could be to load all fields that belong + to the "users" table. If you knew the index of this + table, you could simply build a property key like + <tt>tables.table(<index>).fields.field.name</tt>, + but how do you find out the correct index? When using the + default expression engine, the only solution to this problem is + to iterate over all tables until you find the "users" + table. + </p> + +<p> + Life would be much easier if an expression language could be used, + which would directly support queries of such kind. In the XML + world, the XPATH syntax has grown popular as a powerful means + of querying structured data. In XPATH a query that selects all + field names of the "users" table would look something + like <tt>tables/table[@name='users']/fields/name</tt> (here + we assume that the table's name is modelled as an attribute). + This is not only much simpler than an iteration over all tables, + but also much more readable: it is quite obvious, which fields + are selected by this query. + </p> + +<p> + Given the power of XPATH it is no wonder that we got many + user requests to add XPATH support to Commons Configuration. + Well, here is it! + </p> + +<p> + For enabling XPATH syntax for property keys you need the + <a href="../apidocs/org/apache/commons/configuration/tree/xpath/XPathExpressionEngine.html"> + <tt>XPathExpressionEngine</tt></a> class. This class + implements the <tt>ExpressionEngine</tt> interface and can + be plugged into a <tt>HierarchicalConfiguration</tt> object + using the <tt>setExpressionEngine()</tt> method. It is also + possible to set an instance of this class as the global + expression engine, so that all hierarchical configuration + objects make use of XPATH syntax. The following code fragment + shows how XPATH support can be enabled for a configuration + object: + </p> + +<div class="source"> +<pre> +HierarchicalConfiguration config = ... +config.setExpressionEngine(new XPathExpressionEngine()); + +// Now we can use XPATH queries: +List<Object> fields = config.getList("tables/table[1]/fields/name"); + </pre></div> + +<p> + XPATH expressions are not only used for selecting properties + (i.e. for the several getter methods), but also for adding new + properties. For this purpose the keys passed into the + <tt>addProperty()</tt> method must conform to a special + syntax. They consist of two parts: the first part is an + arbitrary XPATH expression that selects the node where the new + property is to be added to, the second part defines the new + element to be added. Both parts are separated by whitespace. + </p> + +<p> + Okay, let's make an example. Say, we want to add a <tt>type</tt> + property under the first table (as a sibling to the <tt>name</tt> + element). Then the first part of our key will have to select + the first table element, the second part will simply be + <tt>type</tt>, i.e. the name of the new property: + </p> + +<div class="source"> +<pre> +config.addProperty("tables/table[1] type", "system"); + </pre></div> + +<p> + (Note that indices in XPATH are 1-based, while in the default + expression language they are 0-based.) In this example the part + <tt>tables/table[1]</tt> selects the target element of the + add operation. This element must exist and must be unique, otherwise an exception + will be thrown. <tt>type</tt> is the name of the new element + that will be added. If instead of a normal element an attribute + should be added, the example becomes + </p> + +<div class="source"> +<pre> +config.addProperty("tables/table[1] @type", "system"); + </pre></div> + +<p> + It is possible to add complete paths at once. Then the single + elements in the new path are separated by "/" + characters. The following example shows how data about a new + table can be added to the configuration. Here we use full paths: + </p> + +<div class="source"> +<pre> +// Add new table "tasks" with name element and type attribute +config.addProperty("tables table/name", "tasks"); +// last() selects the last element of this name, +// which is the newest table element +config.addProperty("tables/table[last()] @type", "system"); + +// Now add fields +config.addProperty("tables/table[last()] fields/field/name", "taskid"); +config.addProperty("tables/table[last()]/fields/field[last()] type", "int"); +config.addProperty("tables/table[last()]/fields field/name", "name"); +config.addProperty("tables/table[last()]/fields field/name", "startDate"); +... + </pre></div> + +<p> + The first line of this example adds the path <tt>table/name</tt> + to the <tt>tables</tt> element, i.e. a new <tt>table</tt> + element will be created and added as last child to the + <tt>tables</tt> element. Then a new <tt>name</tt> element + is added as child to the new <tt>table</tt> element. To this + element the value "tasks" is assigned. The next line + adds a <tt>type</tt> attribute to the new table element. To + obtain the correct <tt>table</tt> element, to which the + attribute must be added, the XPATH function <tt>last()</tt> + is used; this function selects the last element with a given + name, which in this case is the new <tt>table</tt> element. + The following lines all use the same approach to construct a new + element hierarchy: At first complete new branches are added + (<tt>fields/field/name</tt>), then to the newly created + elements further children are added. + </p> + +<p> + There is one gotcha with these keys described so far: they do + not work with the <tt>setProperty()</tt> method! This is + because <tt>setProperty()</tt> has to check whether the + passed in key already exists; therefore it needs a key which can + be interpreted by query methods. If you want to use + <tt>setProperty()</tt>, you can pass in regular keys (i.e. + without a whitespace separator). The method then tries to figure + out which part of the key already exists in the configuration + and adds new nodes as necessary. In principle such regular keys + can also be used with <tt>addProperty()</tt>. However, they + do not contain sufficient information to decide where new nodes + should be added. + </p> + +<p> + To make this clearer let's go back to the example with the + tables. Consider that there is a configuration which already + contains information about some database tables. In order to add + a new table element in the configuration + <tt>addProperty()</tt> could be used as follows: + </p> + +<div class="source"> +<pre> +config.addProperty("tables/table/name", "documents"); + </pre></div> + +<p> + In the configuration a <tt><tables></tt> element + already exists, also <tt><table></tt> and + <tt><name></tt> elements. How should the expression + engine know where new node structures are to be added? The + solution to this problem is to provide this information in the + key by stating: + </p> + +<div class="source"> +<pre> +config.addProperty("tables table/name", "documents"); + </pre></div> + +<p> + Now it is clear that new nodes should be added as children of + the <tt><tables></tt> element. More information about + keys and how they play together with <tt>addProperty()</tt> + and <tt>setProperty()</tt> can be found in the Javadocs for + <a href="../apidocs/org/apache/commons/configuration/tree/xpath/XPathExpressionEngine.html"> + <tt>XPathExpressionEngine</tt></a>. + </p> + +<p> + <i>Note:</i> XPATH support is implemented through + <a class="externalLink" href="http://commons.apache.org/jxpath">Commons JXPath</a>. + So when making use of this feature, be sure you include the + commons-jxpath jar in your classpath. + </p> + +<p> + In this tutorial we don't want to describe XPATH syntax and + expressions in detail. Please refer to corresponding documentation. + It is important to mention that by embedding Commons JXPath the + full extent of the XPATH 1.0 standard can be used for constructing + property keys. + </p> + </div> + </div> + + +<div class="section"> +<h2>Validation of XML configuration files<a name="Validation_of_XML_configuration_files"></a></h2> + +<p> + XML parsers provide support for validation of XML documents to ensure that they + conform to a certain DTD or XML Schema. This feature can be useful for + configuration files, too. <tt>XMLConfiguration</tt> allows this feature + to be enabled when files are loaded. + </p> + +<div class="section"> +<h3>Validation using a DTD<a name="Validation_using_a_DTD"></a></h3> + +<p> + The easiest way to turn on validation is to simply set the + <tt>validating</tt> property to true as shown in the + following example: + </p> + +<div class="source"> +<pre> +XMLConfiguration config = new XMLConfiguration(); +config.setFileName("myconfig.xml"); +config.setValidating(true); + +// This will throw a ConfigurationException if the XML document does not +// conform to its DTD. +config.load(); +</pre></div> + +<p> + Setting the <tt>validating</tt> flag to true will cause + <tt>XMLConfiguration</tt> to use a validating XML parser. At this parser + a custom <tt>ErrorHandler</tt> will be registered, which throws + exceptions on simple and fatal parsing errors. + </p> + </div> + +<div class="section"> +<h3>Validation using a Schema<a name="Validation_using_a_Schema"></a></h3> + +<p> + XML Parsers also provide support for validating XML documents using an + XML Schema. XMLConfiguration provides a simple mechanism for enabling + this by setting the <tt>schemaValidation</tt> flag to true. This + will also set the <tt>validating</tt> flag to true so both do not + need to be set. The XML Parser will then use the schema defined in the + XML document to validate it. Enabling schema validation will also + enable the parser's namespace support. + </p> + +<p> + </p> +<div class="source"> +<pre> +XMLConfiguration config = new XMLConfiguration(); +config.setFileName("myconfig.xml"); +config.setSchemaValidation(true); + +// This will throw a ConfigurationException if the XML document does not +// conform to its Schema. +config.load(); +</pre></div> + + </div> + +<div class="section"> +<h3>Default Entity Resolution<a name="Default_Entity_Resolution"></a></h3> + +<p> + There is also some support for dealing with DTD files. Often the + DTD of an XML document is stored locally so that it can be quickly + accessed. However the <tt>DOCTYPE</tt> declaration of the document + points to a location on the web as in the following example: + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1"?> +<!DOCTYPE web-app + PUBLIC "-//Sun Microsystems, Inc.//DTD Web Application 2.2//EN" + "http://java.sun.com/j2ee/dtds/web-app_2.2.dtd"> +</pre></div> + +<p> + When working with XML documents directly you would use an + <tt>EntityResolver</tt> in such a case. The task of such an + entity resolver is to point the XML parser to the location of the + file referred to by the declaration. So in our example the entity + resolver would load the DTD file from a local cache instead of + retrieving it from the internet. + </p> + +<p> + <tt>XMLConfiguration</tt> provides a simple default implementation of + an <tt>EntityResolver</tt>. This implementation is initialized + by calling the <tt>registerEntityId()</tt> method with the + public IDs of the entities to be retrieved and their corresponding + local URLs. This method has to be called before the configuration + is loaded. To continue our example, consider that the DTD file for + our example document is stored on the class path. We can register it + at <tt>XMLConfiguration</tt> using the following code: + </p> + +<div class="source"> +<pre> +XMLConfiguration config = new XMLConfiguration(); +// load the URL to the DTD file from class path +URL dtdURL = getClass().getResource("web-app_2.2.dtd"); +// register it at the configuration +config.registerEntityId("-//Sun Microsystems, Inc.//DTD Web Application 2.2//EN", + dtdURL); +config.setValidating(true); // enable validation +config.setFileName("web.xml"); +config.load(); +</pre></div> + +<p> + This basically tells the XML configuration to use the specified + URL when it encounters the given public ID. Note that the call to + <tt>registerEntityId()</tt> has to be performed before the + configuration is loaded. So you cannot use one of the constructors + that directly load the configuration. + </p> + </div> + +<div class="section"> +<h3>Enhanced Entity Resolution<a name="Enhanced_Entity_Resolution"></a></h3> + +<p> + While the default entity resolver can be used under certain circumstances, + it does not work well when using the DefaultConfigurationBuilder. + Furthermore, in many circumstances the programmatic nature of + registering entities will tie the application tightly to the + XML content. In addition, because it only works with the public id it + cannot support XML documents using an XML Schema. + </p> + +<p> + <a class="externalLink" href="http://xml.apache.org/commons/components/resolver/resolver-article.html#s.whats.wrong">XML + Entity and URI Resolvers</a> describes using a set of catalog files to + resolve enitities. Commons Configuration provides support for + this Catalog Resolver through its own CatalogResolver class. + </p> + +<div class="source"> +<pre> +<?xml version="1.0" encoding="ISO-8859-1"?> +<Employees xmlns="http://commons.apache.org/employee" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://commons.apache.org/employee http://commons.apache.org/sample.xsd"> + <Employee> + <SSN>555121211</SSN> + <Name>John Doe</Name> + <DateOfBirth>1975-05-15</DateOfBirth> + <EmployeeType>Exempt</EmployeeType> + <Salary>100000</Salary> + </Employee> +</Employees></pre></div> + +<p> + The XML sample above is an XML document using a default namespace of + http://commons.apache.org/employee. The schemaLocation allows a set + of namespaces and hints to the location of their corresponding + schemas. When processing the document the parser will pass the hint, + in this case http://commons.apache.org/sample.xsd, to the entity resolver + as the system id. More information on using schema locations can be found + at <a class="externalLink" href="http://www.w3.org/TR/xmlschema-0/#schemaLocation">schemaLocation</a>. + </p> + +<p> + The example that follows shows how to use the CatalogResolver when + processing an XMLConfiguration. It should be noted that by using the + setEntityResolver method any EntityResolver may be used, not just those + provided by Commons Configuration. + </p> + +<div class="source"> +<pre> +CatalogResolver resolver = new CatalogResolver(); +resolver.setCatalogFiles("local/catalog.xml","http://test.org/catalogs/catalog1.xml"); +XMLConfiguration config = new XMLConfiguration(); +config.setEntityResolver(resolver); +config.setSchemaValidation(true); // enable schema validation +config.setFileName("config.xml"); +config.load(); +</pre></div> + </div> + +<div class="section"> +<h3>Extending Validation and Entity Resolution<a name="Extending_Validation_and_Entity_Resolution"></a></h3> + +<p> + The mechanisms provided with Commons Configuration will hopefully be + sufficient in most cases, however there will certainly be circumstances + where they are not. XMLConfiguration provides two extension mechanisms + that should provide applications with all the flexibility they may + need. The first, registering a custom Entity Resolver has already been + discussed in the preceeding section. The second is that XMLConfiguration + provides a generic way of setting up the XML parser to use: A preconfigured + <tt>DocumentBuilder</tt> object can be passed to the + <tt>setDocumentBuilder()</tt> method. + </p> + +<p> + So an application can create a <tt>DocumentBuilder</tt> object + and initialize it according to its special needs. Then this + object must be passed to the <tt>XMLConfiguration</tt> instance + before invocation of the <tt>load()</tt> method. When loading + a configuration file, the passed in <tt>DocumentBuilder</tt> will + be used instead of the default one. <i>Note:</i> If a custom + <tt>DocumentBuilder</tt> is used, the default implementation of + the <tt>EntityResolver</tt> interface is disabled. This means + that the <tt>registerEntityId()</tt> method has no effect in + this mode. + </p> + </div> + </div> + + + + </td> + </tr> + </table> + </div> + + <div class="footer"> + <p>Copyright © 2001-2014 + <a href="http://www.apache.org/">The Apache Software Foundation</a>. + All Rights Reserved.</p> + +<div class="center">Apache Commons, Apache Commons Configuration, Apache, the Apache feather logo, and the Apache Commons project logos are trademarks of The Apache Software Foundation. + All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div> + </div> + </body> + +</html>