This won't help you unless you move to Solr 4.0, but here's an update
processor script from the book that can take the first character of a string
field and add it as an integer value for another field:
<updateRequestProcessorChain name="script-add-char-code">
<processor class="solr.StatelessScriptUpdateProcessorFactory">
<str name="script">add-char-code.js</str>
<lst name="params">
<str name="fieldName">content</str>
<str name="codeFieldName">content_code_i</str>
</lst>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
Here is the JavaScript script that should be placed in the
"add-char-code.js" file in the "conf" directory for
the Solr collection:
function processAdd(cmd) {
var fieldName;
var codeFieldName;
if (typeof params !== "undefined") {
fieldName = params.get("fieldName");
codeFieldName = params.get("codeFieldName");
}
if (fieldName == null)
fieldName = "content";
if (codeFieldName == null)
codeFieldName = "content_code_i";
// Get value for named field, no-op if empty
var value = cmd.getSolrInputDocument().getField(fieldName);
if (value != null){
var str = value.getFirstValue();
// No-op if string is empty
if (str != null && str.length() != 0){
// Get code for first character
var code = str.charCodeAt(0);
logger.info("String: \"" + str + "\" len: " + str.length() + " code:
" + code);
// Set the character code output field value
cmd.getSolrInputDocument().addField(codeFieldName, code);
}
}
}
function processDelete() {
// Dummy - add if needed
}
function processCommit() {
// Dummy - add if needed
}
function processRollback() {
// Dummy - add if needed
}
function processMergeIndexes() {
// Dummy - add if needed
}
function finish() {
// Dummy - add if needed
}
Test it:
curl
"http://localhost:8983/solr/update?commit=true&update.chain=script-add-char-code"
\
-H 'Content-type:application/json' -d '
[{"id": "doc-1", "content": "abc"},
{"id": "doc-2", "content": "1"},
{"id": "doc-3", "content": ""},
{"id": "doc-4"},
{"id": "doc-5", "content": "\u0002 abc"},
{"id": "doc-6", "content": ["And, this", "is the end", "of this
test."]}]'
Results:
"id":"doc-1",
"content":["abc"],
"content_code_i":97,
"id":"doc-2",
"content":["1"],
"content_code_i":49,
"id":"doc-3",
"content":[""],
"id":"doc-4",
"id":"doc-5",
"content":["\u0002 abc"],
"content_code_i":2,
"id":"doc-6",
"content":["And, this",
"is the end",
"of this test."],
"content_code_i":65,
-- Jack Krupansky
-----Original Message-----
From: geeky2
Sent: Friday, June 07, 2013 6:27 PM
To: solr-user@lucene.apache.org
Subject: translating a character code to an ordinal?
hello all,
environment: solr 3.5, centos
problem statement: i have several character codes that i want to translate
to ordinal (integer) values (for sorting), while retaining the original code
field in the document.
i was thinking that i could use a copyField from my "code" field to my "ord"
field - then employ a pattern replace filter factory during indexing.
but won't the copyfield fail because the two field types are different?
ps: i also read the wiki about
http://wiki.apache.org/solr/DataImportHandler#Transformer the script
transformer and regex transformer - but was hoping to avoid this - if i
could.
thx
mark
--
View this message in context:
http://lucene.472066.n3.nabble.com/translating-a-character-code-to-an-ordinal-tp4068966.html
Sent from the Solr - User mailing list archive at Nabble.com.