svn commit: r1084677 [2/2] - in /commons/sandbox/runtime/trunk/src/main: java/org/apache/commons/runtime/exception/ native/ native/include/acr/ native/shared/

mturk Wed, 23 Mar 2011 12:07:30 -0700

Added: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
URL: 
http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=1084677&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (added)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Wed Mar 23 
19:07:01 2011
@@ -0,0 +1,945 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "acr/string.h"
+#include "acr/memory.h"
+#include "acr/clazz.h"
+
+extern int acr_native_codepage;
+
+J_DECLARE_CLAZZ = {
+    NULL,
+    NULL,
+    "java/lang/String"
+};
+
+J_DECLARE_M_ID(0000) = {
+    NULL,
+    "<init>",
+    "([B)V"
+};
+
+J_DECLARE_M_ID(0001) = {
+    NULL,
+    "getBytes",
+    "()[B"
+};
+
+ACR_CLASS_LOADER(String)
+{
+    int rv;
+
+    if ((rv = AcrLoadClass(_E, &_clazzn, 0)) != ACR_SUCCESS)
+        return rv;
+    J_LOAD_METHOD(0000);
+    J_LOAD_METHOD(0001);
+
+    return ACR_SUCCESS;
+}
+
+ACR_CLASS_UNLOADER(String)
+{
+    AcrUnloadClass(_E, &_clazzn);
+}
+
+static const char *iso_8859_1_aliases[] = {
+    "iso-8859-1", "iso_8859-1", "iso_8859_1", "8859-1", "8859_1",
+    "iso8859-1", "iso8859_1 ", "latin1", "ibm-819", "ibm819",
+    "cp819", "819", "28591", "windows-28591", NULL
+};
+
+static const char *utf_8_aliases[] = {
+    "utf8", "utf-8", "cp1208", "65001", "windows-65001", NULL
+};
+
+static const char *us_ascii_aliases[] = {
+    "us-ascii", "ascii", "ascii7", "iso646-us", "us", "ibm367",
+    "cp367", "ansi_x3.4-1968", "646", "646us", "windows-20127", NULL
+};
+
+int
+AcrGetNativeCodePage(const char *cs)
+{
+    int i;
+    if (cs && *cs) {
+        for (i = 0; iso_8859_1_aliases[i]; i++) {
+            if (strcasecmp(cs, iso_8859_1_aliases[i]) == 0)
+                return ACR_CP_ISO8859_1;
+        }
+        for (i = 0; utf_8_aliases[i]; i++) {
+            if (strcasecmp(cs, utf_8_aliases[i]) == 0)
+                return ACR_CP_UTF_8;
+        }
+        for (i = 0; us_ascii_aliases[i]; i++) {
+            if (strcasecmp(cs, us_ascii_aliases[i]) == 0)
+                return ACR_CP_ISO8859_1;
+        }
+    }
+    return ACR_CP_DEFAULT;
+}
+
+static char *get_string_iso_8859_1(JNIEnv *_E, jstring str, char *b)
+{
+    jsize sl;
+    const jchar *sr;
+    char *rv = NULL;
+
+    sl = (*_E)->GetStringLength(_E, str);
+    if (b && sl < ACR_MBUFF_LEN)
+        rv = b;
+    else {
+        rv = ACR_MALLOC(char, sl + 1);
+        if (rv == NULL) {
+            /* Exception has already neen throw from AcrMalloc
+             */
+            return NULL;
+        }
+    }
+    sr = (*_E)->GetStringCritical(_E, str, NULL);
+    if (!sr) {
+        if (rv != b)
+            AcrFree(rv);
+        return NULL;
+    }
+    else {
+        jsize i;
+        for (i = 0; i < sl; i++)
+            rv[i] = (char)(sr[i] & 0xFF);
+    }
+    rv[sl] = '\0';
+    (*_E)->ReleaseStringCritical(_E, str, sr);
+    return rv;
+}
+
+/* Implementation of RFC 3629, "UTF-8, a transformation format of ISO 10646"
+ * with particular attention to canonical translation forms (see section 10
+ * "Security Considerations" of the RFC for more info).
+ *
+ * Since several architectures including Windows support unicode, with UCS2
+ * used as the actual storage conventions by that archicture, these functions
+ * exist to transform or validate UCS2 strings into APR's 'char' type
+ * convention.  It is left up to the operating system to determine the
+ * validitity of the string, e.g. normative forms, in the context of
+ * its native language support.  Other file systems which support filename
+ * characters of 0x80-0xff but have no explicit requirement for Unicode
+ * will find this function useful only for validating the character sequences
+ * and rejecting poorly encoded UTF8 sequences.
+ *
+ * Len UCS-4 range (hex) UTF-8 octet sequence (binary)
+ * 1:2 00000000-0000007F 0xxxxxxx
+ * 2:2 00000080-000007FF 110XXXXx 10xxxxxx
+ * 3:2 00000800-0000FFFF 1110XXXX 10Xxxxxx 10xxxxxx
+ * 4:4 00010000-001FFFFF 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx
+ *     00200000-03FFFFFF 111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *     04000000-7FFFFFFF 1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * One of the X bits must be 1 to avoid overlong representation of ucs2 values.
+ *
+ * For conversion into ucs2, the 4th form is limited in range to 0010 FFFF,
+ * and the final two forms are used only by full ucs4, per RFC 3629;
+ *
+ *   "Pairs of UCS-2 values between D800 and DFFF (surrogate pairs in
+ *   Unicode parlance), being actually UCS-4 characters transformed
+ *   through UTF-16, need special treatment: the UTF-16 transformation
+ *   must be undone, yielding a UCS-4 character that is then transformed
+ *   as above."
+ *
+ * From RFC2781 UTF-16: the compressed ISO 10646 encoding bitmask
+ *
+ *  U' = U - 0x10000
+ *  U' = 00000000 0000yyyy yyyyyyxx xxxxxxxx
+ *                    W1 = 110110yy yyyyyyyy
+ *                    W2 = 110111xx xxxxxxxx
+ *  Max U' = 0000 00001111 11111111 11111111
+ *  Max U  = 0000 00010000 11111111 11111111
+ *
+ * Len is the table above is a mapping of bytes used for utf8:ucs2 values,
+ * which results in these conclusions of maximum allocations;
+ *
+ *  conv_utf8_to_ucs2 out bytes:sizeof(in) * 1 <= Req <= sizeof(in) * 2
+ *  conv_ucs2_to_utf8 out words:sizeof(in) / 2 <= Req <= sizeof(in) * 3 / 2
+ */
+static int conv_utf8_to_ucs2(const char *in, jsize inbytes,
+                             jchar *out, jsize *outwords)
+{
+    acr_i64_t newch, mask;
+    jsize expect, eating;
+    int ch;
+
+    while (inbytes && *outwords) {
+        ch = (unsigned char)(*in++);
+        if (!(ch & 0200)) {
+            /* US-ASCII-7 plain text
+             */
+            --inbytes;
+            --*outwords;
+            *(out++) = ch;
+        }
+        else {
+            if ((ch & 0300) != 0300) {
+                /* Multibyte Continuation is out of place
+                 */
+                return ACR_EINVAL;
+            }
+            else {
+                /* Multibyte Sequence Lead Character
+                 *
+                 * Compute the expected bytes while adjusting
+                 * or lead byte and leading zeros mask.
+                 */
+                mask = 0340;
+                expect = 1;
+                while ((ch & mask) == mask) {
+                    mask |= mask >> 1;
+                    if (++expect > 3) /* (truly 5 for ucs-4) */
+                        return ACR_EINVAL;
+                }
+                newch = ch & ~mask;
+                eating = expect + 1;
+                if (inbytes <= expect)
+                    return ACR_INCOMPLETE;
+                /* Reject values of excessive leading 0 bits
+                 * utf-8 _demands_ the shortest possible byte length
+                 */
+                if (expect == 1) {
+                    if (!(newch & 0036))
+                        return ACR_EINVAL;
+                }
+                else {
+                    /* Reject values of excessive leading 0 bits
+                     */
+                    if (!newch && !((unsigned char)*in & 0077 & (mask << 1)))
+                        return ACR_EINVAL;
+                    if (expect == 2) {
+                        /* Reject values D800-DFFF when not utf16 encoded
+                         * (may not be an appropriate restriction for ucs-4)
+                         */
+                        if (newch == 0015 && ((unsigned char)*in & 0040))
+                            return ACR_EINVAL;
+                    }
+                    else if (expect == 3) {
+                        /* Short circuit values > 110000
+                         */
+                        if (newch > 4)
+                            return ACR_EINVAL;
+                        if (newch == 4 && ((unsigned char)*in & 0060))
+                            return ACR_EINVAL;
+                    }
+                }
+                /* Where the boolean (expect > 2) is true, we will need
+                 * an extra word for the output.
+                 */
+                if (*outwords < (jsize)(expect > 2) + 1)
+                    break; /* buffer full */
+                while (expect--) {
+                    /* Multibyte Continuation must be legal */
+                    if (((ch = (unsigned char)*(in++)) & 0300) != 0200)
+                        return ACR_EINVAL;
+                    newch <<= 6;
+                    newch |= (ch & 0077);
+                }
+                inbytes -= eating;
+                /* newch is now a true ucs-4 character
+                 *
+                 * now we need to fold to ucs-2
+                 */
+                if (newch < 0x10000) {
+                    --*outwords;
+                    *(out++) = (jchar) newch;
+                }
+                else {
+                    *outwords -= 2;
+                    newch -= 0x10000;
+                    *(out++) = (jchar) (0xD800 | (newch >> 10));
+                    *(out++) = (jchar) (0xDC00 | (newch & 0x03FF));
+                }
+            }
+        }
+    }
+    /* Buffer full 'errors' aren't errors, the client must inspect both
+     * the inbytes and outwords values
+     */
+    return ACR_SUCCESS;
+}
+
+static int conv_utf8_to_wcs(const char *in, size_t inbytes,
+                            wchar_t *out, size_t *outwords)
+{
+    acr_i64_t newch, mask;
+    size_t expect, eating;
+    int ch;
+
+    while (inbytes && *outwords) {
+        ch = (unsigned char)(*in++);
+        if (!(ch & 0200)) {
+            /* US-ASCII-7 plain text
+             */
+            --inbytes;
+            --*outwords;
+            *(out++) = ch;
+        }
+        else {
+            if ((ch & 0300) != 0300) {
+                /* Multibyte Continuation is out of place
+                 */
+                return ACR_EINVAL;
+            }
+            else {
+                /* Multibyte Sequence Lead Character
+                 *
+                 * Compute the expected bytes while adjusting
+                 * or lead byte and leading zeros mask.
+                 */
+                mask = 0340;
+                expect = 1;
+                while ((ch & mask) == mask) {
+                    mask |= mask >> 1;
+                    if (++expect > 3) /* (truly 5 for ucs-4) */
+                        return ACR_EINVAL;
+                }
+                newch = ch & ~mask;
+                eating = expect + 1;
+                if (inbytes <= expect)
+                    return ACR_INCOMPLETE;
+                /* Reject values of excessive leading 0 bits
+                 * utf-8 _demands_ the shortest possible byte length
+                 */
+                if (expect == 1) {
+                    if (!(newch & 0036))
+                        return ACR_EINVAL;
+                }
+                else {
+                    /* Reject values of excessive leading 0 bits
+                     */
+                    if (!newch && !((unsigned char)*in & 0077 & (mask << 1)))
+                        return ACR_EINVAL;
+                    if (expect == 2) {
+                        /* Reject values D800-DFFF when not utf16 encoded
+                         * (may not be an appropriate restriction for ucs-4)
+                         */
+                        if (newch == 0015 && ((unsigned char)*in & 0040))
+                            return ACR_EINVAL;
+                    }
+                    else if (expect == 3) {
+                        /* Short circuit values > 110000
+                         */
+                        if (newch > 4)
+                            return ACR_EINVAL;
+                        if (newch == 4 && ((unsigned char)*in & 0060))
+                            return ACR_EINVAL;
+                    }
+                }
+                /* Where the boolean (expect > 2) is true, we will need
+                 * an extra word for the output.
+                 */
+                if (*outwords < (size_t)(expect > 2) + 1)
+                    break; /* buffer full */
+                while (expect--) {
+                    /* Multibyte Continuation must be legal */
+                    if (((ch = (unsigned char)*(in++)) & 0300) != 0200)
+                        return ACR_EINVAL;
+                    newch <<= 6;
+                    newch |= (ch & 0077);
+                }
+                inbytes -= eating;
+#if CC_SIZEOF_WCHAR_T == 2
+                /* newch is now a true ucs-4 character
+                 *
+                 * now we need to fold to ucs-2
+                 */
+                if (newch < 0x10000) {
+                    --*outwords;
+                    *(out++) = (wchar_t) newch;
+                }
+                else {
+                    *outwords -= 2;
+                    newch -= 0x10000;
+                    *(out++) = (wchar_t) (0xD800 | (newch >> 10));
+                    *(out++) = (wchar_t) (0xDC00 | (newch & 0x03FF));
+                }
+#else
+                --*outwords;
+                *(out++) = (wchar_t) newch;
+#endif
+            }
+        }
+    }
+    /* Buffer full 'errors' aren't errors, the client must inspect both
+     * the inbytes and outwords values
+     */
+    return ACR_SUCCESS;
+}
+
+/* Java implementation of GetStringUTF is bogus.
+ * It breaks on embeded NUL in strings.
+ * Use the APR implementation instead.
+ */
+static int conv_ucs2_to_utf8(const jchar *in, jsize inwords,
+                             char *out, jsize *outbytes)
+{
+    acr_i64_t newch, require;
+    jsize need;
+    char *invout;
+    int ch;
+
+    while (inwords && *outbytes) {
+        ch = (unsigned short)(*in++);
+        if (ch < 0x80) {
+            --inwords;
+            --*outbytes;
+            *(out++) = (unsigned char) ch;
+        }
+        else  {
+            if ((ch & 0xFC00) == 0xDC00) {
+                /* Invalid Leading ucs-2 Multiword Continuation Character
+                 */
+                return ACR_EINVAL;
+            }
+            if ((ch & 0xFC00) == 0xD800) {
+                /* Leading ucs-2 Multiword Character
+                 */
+                if (inwords < 2) {
+                    /* Missing ucs-2 Multiword Continuation Character
+                     */
+                    return ACR_INCOMPLETE;
+                }
+                if (((unsigned short)(*in) & 0xFC00) != 0xDC00) {
+                    /* Invalid ucs-2 Multiword Continuation Character
+                     */
+                    return ACR_EINVAL;
+                }
+                newch = (ch & 0x03FF) << 10 | ((unsigned short)(*in++) & 
0x03FF);
+                newch += 0x10000;
+            }
+            else {
+                /* ucs-2 Single Word Character
+                 */
+                newch = ch;
+            }
+            /* Determine the absolute minimum utf-8 bytes required
+             */
+            require = newch >> 11;
+            need = 1;
+            while (require)
+                require >>= 5, ++need;
+            if (need >= *outbytes)
+                break; /* Insufficient buffer */
+            inwords   -= (need > 2) + 1;
+            *outbytes -=  need + 1;
+            /* Compute the utf-8 characters in last to first order,
+             * calculating the lead character length bits along the way.
+             */
+            ch = 0200;
+            out += need + 1;
+            invout = out;
+            while (need--) {
+                ch |= ch >> 1;
+                *(--invout) = (unsigned char)(0200 | (newch & 0077));
+                newch >>= 6;
+            }
+            /* Compute the lead utf-8 character and move the dest offset
+             */
+            *(--invout) = (unsigned char)(ch | newch);
+        }
+    }
+    /* Buffer full 'errors' aren't errors, the client must inspect both
+     * the inwords and outbytes values
+     */
+    return ACR_SUCCESS;
+}
+
+static jsize java_ucs2_to_utf8_len(const jchar *in, jsize inwords)
+{
+    jsize need = 1;
+    int ch;
+
+    while (inwords) {
+        ch = (unsigned short)(*in++);
+        if (ch == 0)
+            need += 2;
+        if (ch < 0x80)
+            need += 1;
+        else  {
+           if (ch < 0x0800)
+                need += 2;
+            else
+                need += 3;
+        }
+        --inwords;
+    }
+    /* Buffer full 'errors' aren't errors, the client must inspect both
+     * the inwords and outbytes values
+     */
+    return need;
+}
+
+/* Modified UTF-8 according to the java.io.DataInput
+ * specification
+ */
+static int java_ucs2_to_utf8(const jchar *in, jsize inwords,
+                             char *out, jsize *outbytes)
+{
+    int ch;
+
+    while (inwords && *outbytes) {
+        ch = (unsigned short)(*in++);
+        if (ch == 0) {
+            if (*outbytes < 2)
+                return ACR_INCOMPLETE;
+            *outbytes -= 2;
+            *(out++) = (unsigned char)0xC0;
+            *(out++) = (unsigned char)0x80;
+        }
+        if (ch < 0x80) {
+            --*outbytes;
+            *(out++) = (unsigned char)ch;
+        }
+        else  {
+            if (ch < 0x0800) {
+                /* Two byte sequence
+                 */
+                if (*outbytes < 2)
+                    return ACR_INCOMPLETE;
+                *outbytes -= 2;
+                *(out++) = (unsigned char)(0xC0 | ((ch >> 6) & 0x1F));
+                *(out++) = (unsigned char)(0x80 | ((ch)      & 0x3F));
+            }
+            else {
+                /* Three byte sequence
+                 */
+                if (*outbytes < 3)
+                    return ACR_INCOMPLETE;
+                *outbytes -= 3;
+                *(out++) = (unsigned char)(0xE0 | ((ch >> 12) & 0x0F));
+                *(out++) = (unsigned char)(0x80 | ((ch >>  6) & 0x3F));
+                *(out++) = (unsigned char)(0x80 | ((ch)       & 0x3F));
+            }
+        }
+       --inwords;
+    }
+    if (*outbytes) {
+        *(out++) = '\0';
+        --*outbytes;
+    }
+    return ACR_SUCCESS;
+}
+/* Modified UTF-8 according to the java.io.DataOutput
+ * specification
+ */
+static int java_utf8_to_ucs2(const char *in, jsize inbytes,
+                             jchar *out, jsize *outwords)
+{
+    int i, ch;
+
+    while (inbytes && *outwords) {
+        ch = (unsigned char)(*in++);
+        if (ch == 0) {
+            --*outwords;
+            *(out) = (jchar)0;
+            return ACR_SUCCESS;
+        }
+        else if (!(ch & 0x80)) {
+            /* US-ASCII-7 plain text
+             */
+            --inbytes;
+        }
+        else if ((ch & 0xE0) == 0xC0) {
+            /* Two byte sequence */
+            if (inbytes < 2)
+                return ACR_INCOMPLETE;
+            inbytes -= 2;
+            ch = ch & 0x1F;
+            if ((*in & 0xC0) != 0x80)
+                return ACR_EILSEQ;
+            ch <<= 6;
+            ch |= (unsigned char)(*(in++) & 0x3F);
+        }
+        else if ((ch & 0xF0) == 0xE0) {
+            /* Three byte sequence */
+            if (inbytes < 3)
+                return ACR_INCOMPLETE;
+            inbytes -= 3;
+            ch = ch & 0x0F;
+            for (i = 0; i  < 2; i++) {
+                if ((*in & 0xC0) != 0x80)
+                    return ACR_EILSEQ;
+                ch <<= 6;
+                ch |= (unsigned char)(*(in++) & 0x3F);
+            }
+        }
+        else {
+            return ACR_EILSEQ;
+        }
+        *(out++) = (jchar)ch;
+        --*outwords;
+    }
+    return ACR_SUCCESS;
+}
+
+jchar *
+AcrUtf8ToUcs2(JNIEnv *_E, const char *str)
+{
+    int rc;
+    jsize len, out;
+    jchar *dst;
+
+    out = len = (jsize)strlen(str) + 1;
+    dst = ACR_MALLOC(jchar, len);
+    if (!dst)
+        return NULL;
+    if ((rc = java_utf8_to_ucs2(str, len, dst, &out))) {
+        /* Invalid UTF-8 string */
+        AcrFree(dst);
+        ACR_SET_OS_ERROR(rc);
+        return NULL;
+    }
+    return dst;
+}
+
+wchar_t *
+AcrUtf8ToWcs(JNIEnv *_E, const char *str)
+{
+    int rc;
+    size_t len, out;
+    wchar_t *dst;
+
+    out = len = strlen(str) + 1;
+    dst = ACR_MALLOC(wchar_t, len);
+    if (!dst)
+        return NULL;
+    if ((rc = conv_utf8_to_wcs(str, len, dst, &out))) {
+        /* Invalid UTF-8 string */
+        AcrFree(dst);
+        ACR_SET_OS_ERROR(rc);
+        return NULL;
+    }
+    return dst;
+}
+
+char *
+AcrUsc2ToUtf8(JNIEnv *_E, const jchar *str, jsize len)
+{
+    int rc;
+    jsize out;
+    char *dst;
+
+    out = java_ucs2_to_utf8_len(str, len);
+    dst = ACR_MALLOC(char, out);
+    if (!dst)
+        return NULL;
+    if ((rc = java_ucs2_to_utf8(str, len, dst, &out))) {
+        /* Invalid UTF-8 string */
+        AcrFree(dst);
+        ACR_SET_OS_ERROR(rc);
+        return NULL;
+    }
+    return dst;
+}
+
+static char *get_string_utf_8(JNIEnv *_E, jstring str, char *b)
+{
+    jsize sl, nl;
+    const jchar *sr;
+    char *rv = NULL;
+
+    if (!str) {
+        return NULL;
+    }
+    if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+        /* JNI out of memory error */
+        return NULL;
+    }
+    sl = (*_E)->GetStringLength(_E, str);
+    nl = sl * 3;
+    if (b && nl < ACR_MBUFF_LEN)
+        rv = b;
+    else {
+        rv = ACR_MALLOC(char, nl + 1);
+        if (!rv) {
+            /* Exception has already neen throw from ACR_Malloc
+             */
+            return NULL;
+        }
+    }
+    sr = (*_E)->GetStringCritical(_E, str, NULL);
+    if (!sr) {
+        if (rv != b)
+            AcrFree(rv);
+        return NULL;
+    }
+    else {
+        jsize ol = nl;
+        if (conv_ucs2_to_utf8(sr, sl, rv, &nl) == ACR_SUCCESS)
+            rv[ol - nl] = '\0';
+        else {
+            /* XXX: Throw some exception ?
+             */
+            if (rv != b)
+                AcrFree(rv);
+            return NULL;
+        }
+    }
+    (*_E)->ReleaseStringCritical(_E, str, sr);
+    return rv;
+}
+
+static char *get_string_default(JNIEnv *_E, jstring str, char *b)
+{
+    jbyteArray sb = NULL;
+    char *rs = NULL;
+
+    if (!_clazzn.i || !J4MID(0000)) {
+        ACR_SET_OS_ERROR(ACR_EINIT);
+        return NULL;
+    }
+    sb = CALL_METHOD0(Object, 0001, str);
+    if ((*_E)->ExceptionCheck(_E))
+        return NULL;
+    else {
+        jint len = (*_E)->GetArrayLength(_E, sb);
+        if (b && len < ACR_PBUFF_LEN) {
+            /* Use provided stack storage */
+            rs = b;
+        }
+        else {
+            rs = ACR_MALLOC(char, len + 1);
+            if (rs == NULL) {
+                (*_E)->DeleteLocalRef(_E, sb);
+                return NULL;
+            }
+        }
+        (*_E)->GetByteArrayRegion(_E, sb, 0, len, (jbyte *)rs);
+        rs[len] = '\0'; /* NUL-terminate */
+    }
+    (*_E)->DeleteLocalRef(_E, sb);
+    return rs;
+}
+
+static jstring new_string_default(JNIEnv *_E, const char *str)
+{
+    jstring    rs;
+    jbyteArray ba;
+    jsize      sl;
+
+    sl = (jsize)strlen(str);
+    ba = (*_E)->NewByteArray(_E, sl);
+    if (ba != NULL) {
+        (*_E)->SetByteArrayRegion(_E, ba, 0, sl, (jbyte *)str);
+        rs = (*_E)->NewObject(_E, _clazzn.i, J4MID(0000), ba);
+        (*_E)->DeleteLocalRef(_E, ba);
+        return rs;
+    }
+    return NULL;
+}
+
+static jstring new_string_iso_8859_1(JNIEnv *_E, const char *s)
+{
+    jstring rs = NULL;
+    if (s) {
+        size_t l = strlen(s);
+        if (l < ACR_MBUFF_SIZ) {
+            jchar  cc[ACR_MBUFF_SIZ];
+            size_t  i;
+            for (i = 0; i < l; i++) {
+                cc[i] = s[i];
+            }
+            rs = (*_E)->NewString(_E, cc, (jsize)l);
+        }
+        else {
+            jchar  *cc;
+            if ((cc = ACR_MALLOC(jchar, l + 1))) {
+                size_t  i;
+                for (i = 0; i < l; i++) {
+                    cc[i] = s[i];
+                }
+                rs = (*_E)->NewString(_E, cc, (jsize)l);
+                AcrFree(cc);
+            }
+        }
+    }
+    return rs;
+}
+
+static jstring new_string_utf_8(JNIEnv *_E, const char *s)
+{
+    jstring rs = NULL;
+    if (s) {
+          int ex;
+        jsize sl = (jsize)strlen(s);
+        if (sl < ACR_MBUFF_SIZ) {
+            jchar  cc[ACR_MBUFF_SIZ];
+            jsize  wl = ACR_MBUFF_LEN;
+            if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS)
+                rs = (*_E)->NewString(_E, cc, sl);
+            else
+                AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL, ex);
+        }
+        else {
+            jchar  *cc;
+            if ((cc = ACR_MALLOC(jchar, sl + 1))) {
+                jsize wl = sl;
+                if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS)
+                    rs = (*_E)->NewString(_E, cc, sl);
+                else
+                    AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL, 
ex);
+                AcrFree(cc);
+            }
+        }
+    }
+    return rs;
+}
+
+
+wchar_t *
+AcrGetJavaStringW(JNIEnv *_E, jstring str, wchar_t *b)
+{
+    jsize sl;
+    const jchar *sr;
+    wchar_t *rv = NULL;
+
+    if (!str) {
+        return NULL;
+    }
+    if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+        /* JNI out of memory error */
+        return NULL;
+    }
+    sl = (*_E)->GetStringLength(_E, str);
+    if (b && sl < ACR_MBUFF_LEN)
+        rv = b;
+    else {
+        rv = ACR_MALLOC(wchar_t, sl + 1);
+        if (!rv) {
+            /* Exception has already neen throw from ACR_Malloc
+             */
+            return NULL;
+        }
+    }
+    sr = (*_E)->GetStringCritical(_E, str, NULL);
+    if (!sr) {
+        if (rv != b)
+            AcrFree(rv);
+        return NULL;
+    }
+    else {
+#if CC_SIZEOF_WCHAR_T == 2
+        memcpy(rv, sr, sl * sizeof(wchar_t));
+#else
+        jsize i;
+        for (i = 0; i < sl; i++)
+            rv[i] = sr[i];
+#endif
+    }
+    rv[sl] = L'\0';
+    (*_E)->ReleaseStringCritical(_E, str, sr);
+    return rv;
+}
+
+char *
+AcrGetJavaStringA(JNIEnv *_E, jstring str, char *b)
+{
+    char *rv = NULL;
+    if (!str) {
+        return NULL;
+    }
+    if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+        /* JNI out of memory error */
+        return NULL;
+    }
+    switch (acr_native_codepage) {
+        case ACR_CP_ISO8859_1:
+            rv = get_string_iso_8859_1(_E, str, b);
+        break;
+        case ACR_CP_UTF_8:
+            rv = get_string_utf_8(_E, str, b);
+        break;
+        default:
+            rv = get_string_default(_E, str, b);
+        break;
+    }
+    return rv;
+}
+
+jstring
+AcrNewJavaStringW(JNIEnv *_E, const wchar_t *s)
+{
+    jstring r = NULL;
+    if (s) {
+        size_t l = wcslen(s);
+#if CC_SIZEOF_WCHAR_T == 2
+        r = (*_E)->NewString(_E, (const jchar *)s, (jsize)l);
+#else
+        if (l < ACR_MBUFF_SIZ) {
+            jchar  cc[ACR_MBUFF_SIZ];
+            size_t  i;
+            for (i = 0; i < l; i++) {
+                /* Simply assign utf32 to utf16 */
+                cc[i] = (jchar)s[i];
+            }
+            r = (*_E)->NewString(_E, cc, l);
+        }
+        else {
+            jchar  *cc;
+            if ((cc = ACR_MALLOC(jchar, l + 1))) {
+                size_t  i;
+                for (i = 0; i < l; i++) {
+                    /* Simply assign utf32 to utf16 */
+                    cc[i] = (jchar)s[i];
+                }
+                r = (*_E)->NewString(_E, cc, l);
+                AcrFree(cc);
+            }
+        }
+#endif
+    }
+    return r;
+}
+
+jstring
+AcrNewJavaStringA(JNIEnv *_E, const char *str)
+{
+    jstring rv = NULL;
+    if (!str)
+        return NULL;
+    if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+        /* JNI out of memory error */
+        return NULL;
+    }
+    switch (acr_native_codepage) {
+        case ACR_CP_ISO8859_1:
+            rv = new_string_iso_8859_1(_E, str);
+        break;
+        case ACR_CP_UTF_8:
+            rv = new_string_utf_8(_E, str);
+        break;
+        default:
+            rv = new_string_default(_E, str);
+        break;
+    }
+    return rv;
+}
+
+jstring
+AcrNewJavaStringU(JNIEnv *_E, const char *str)
+{
+    if (!str)
+        return NULL;
+    if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+        /* JNI out of memory error */
+        return NULL;
+    }
+    return new_string_utf_8(_E, str);
+}


Propchange: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
------------------------------------------------------------------------------
    svn:eol-style = native

svn commit: r1084677 [2/2] - in /commons/sandbox/runtime/trunk/src/main: java/org/apache/commons/runtime/exception/ native/ native/include/acr/ native/shared/

Reply via email to