Added: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
URL:
http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=1084677&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (added)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Wed Mar 23
19:07:01 2011
@@ -0,0 +1,945 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "acr/string.h"
+#include "acr/memory.h"
+#include "acr/clazz.h"
+
+extern int acr_native_codepage;
+
+J_DECLARE_CLAZZ = {
+ NULL,
+ NULL,
+ "java/lang/String"
+};
+
+J_DECLARE_M_ID(0000) = {
+ NULL,
+ "<init>",
+ "([B)V"
+};
+
+J_DECLARE_M_ID(0001) = {
+ NULL,
+ "getBytes",
+ "()[B"
+};
+
+ACR_CLASS_LOADER(String)
+{
+ int rv;
+
+ if ((rv = AcrLoadClass(_E, &_clazzn, 0)) != ACR_SUCCESS)
+ return rv;
+ J_LOAD_METHOD(0000);
+ J_LOAD_METHOD(0001);
+
+ return ACR_SUCCESS;
+}
+
+ACR_CLASS_UNLOADER(String)
+{
+ AcrUnloadClass(_E, &_clazzn);
+}
+
+static const char *iso_8859_1_aliases[] = {
+ "iso-8859-1", "iso_8859-1", "iso_8859_1", "8859-1", "8859_1",
+ "iso8859-1", "iso8859_1 ", "latin1", "ibm-819", "ibm819",
+ "cp819", "819", "28591", "windows-28591", NULL
+};
+
+static const char *utf_8_aliases[] = {
+ "utf8", "utf-8", "cp1208", "65001", "windows-65001", NULL
+};
+
+static const char *us_ascii_aliases[] = {
+ "us-ascii", "ascii", "ascii7", "iso646-us", "us", "ibm367",
+ "cp367", "ansi_x3.4-1968", "646", "646us", "windows-20127", NULL
+};
+
+int
+AcrGetNativeCodePage(const char *cs)
+{
+ int i;
+ if (cs && *cs) {
+ for (i = 0; iso_8859_1_aliases[i]; i++) {
+ if (strcasecmp(cs, iso_8859_1_aliases[i]) == 0)
+ return ACR_CP_ISO8859_1;
+ }
+ for (i = 0; utf_8_aliases[i]; i++) {
+ if (strcasecmp(cs, utf_8_aliases[i]) == 0)
+ return ACR_CP_UTF_8;
+ }
+ for (i = 0; us_ascii_aliases[i]; i++) {
+ if (strcasecmp(cs, us_ascii_aliases[i]) == 0)
+ return ACR_CP_ISO8859_1;
+ }
+ }
+ return ACR_CP_DEFAULT;
+}
+
+static char *get_string_iso_8859_1(JNIEnv *_E, jstring str, char *b)
+{
+ jsize sl;
+ const jchar *sr;
+ char *rv = NULL;
+
+ sl = (*_E)->GetStringLength(_E, str);
+ if (b && sl < ACR_MBUFF_LEN)
+ rv = b;
+ else {
+ rv = ACR_MALLOC(char, sl + 1);
+ if (rv == NULL) {
+ /* Exception has already neen throw from AcrMalloc
+ */
+ return NULL;
+ }
+ }
+ sr = (*_E)->GetStringCritical(_E, str, NULL);
+ if (!sr) {
+ if (rv != b)
+ AcrFree(rv);
+ return NULL;
+ }
+ else {
+ jsize i;
+ for (i = 0; i < sl; i++)
+ rv[i] = (char)(sr[i] & 0xFF);
+ }
+ rv[sl] = '\0';
+ (*_E)->ReleaseStringCritical(_E, str, sr);
+ return rv;
+}
+
+/* Implementation of RFC 3629, "UTF-8, a transformation format of ISO 10646"
+ * with particular attention to canonical translation forms (see section 10
+ * "Security Considerations" of the RFC for more info).
+ *
+ * Since several architectures including Windows support unicode, with UCS2
+ * used as the actual storage conventions by that archicture, these functions
+ * exist to transform or validate UCS2 strings into APR's 'char' type
+ * convention. It is left up to the operating system to determine the
+ * validitity of the string, e.g. normative forms, in the context of
+ * its native language support. Other file systems which support filename
+ * characters of 0x80-0xff but have no explicit requirement for Unicode
+ * will find this function useful only for validating the character sequences
+ * and rejecting poorly encoded UTF8 sequences.
+ *
+ * Len UCS-4 range (hex) UTF-8 octet sequence (binary)
+ * 1:2 00000000-0000007F 0xxxxxxx
+ * 2:2 00000080-000007FF 110XXXXx 10xxxxxx
+ * 3:2 00000800-0000FFFF 1110XXXX 10Xxxxxx 10xxxxxx
+ * 4:4 00010000-001FFFFF 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx
+ * 00200000-03FFFFFF 111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 04000000-7FFFFFFF 1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * One of the X bits must be 1 to avoid overlong representation of ucs2 values.
+ *
+ * For conversion into ucs2, the 4th form is limited in range to 0010 FFFF,
+ * and the final two forms are used only by full ucs4, per RFC 3629;
+ *
+ * "Pairs of UCS-2 values between D800 and DFFF (surrogate pairs in
+ * Unicode parlance), being actually UCS-4 characters transformed
+ * through UTF-16, need special treatment: the UTF-16 transformation
+ * must be undone, yielding a UCS-4 character that is then transformed
+ * as above."
+ *
+ * From RFC2781 UTF-16: the compressed ISO 10646 encoding bitmask
+ *
+ * U' = U - 0x10000
+ * U' = 00000000 0000yyyy yyyyyyxx xxxxxxxx
+ * W1 = 110110yy yyyyyyyy
+ * W2 = 110111xx xxxxxxxx
+ * Max U' = 0000 00001111 11111111 11111111
+ * Max U = 0000 00010000 11111111 11111111
+ *
+ * Len is the table above is a mapping of bytes used for utf8:ucs2 values,
+ * which results in these conclusions of maximum allocations;
+ *
+ * conv_utf8_to_ucs2 out bytes:sizeof(in) * 1 <= Req <= sizeof(in) * 2
+ * conv_ucs2_to_utf8 out words:sizeof(in) / 2 <= Req <= sizeof(in) * 3 / 2
+ */
+static int conv_utf8_to_ucs2(const char *in, jsize inbytes,
+ jchar *out, jsize *outwords)
+{
+ acr_i64_t newch, mask;
+ jsize expect, eating;
+ int ch;
+
+ while (inbytes && *outwords) {
+ ch = (unsigned char)(*in++);
+ if (!(ch & 0200)) {
+ /* US-ASCII-7 plain text
+ */
+ --inbytes;
+ --*outwords;
+ *(out++) = ch;
+ }
+ else {
+ if ((ch & 0300) != 0300) {
+ /* Multibyte Continuation is out of place
+ */
+ return ACR_EINVAL;
+ }
+ else {
+ /* Multibyte Sequence Lead Character
+ *
+ * Compute the expected bytes while adjusting
+ * or lead byte and leading zeros mask.
+ */
+ mask = 0340;
+ expect = 1;
+ while ((ch & mask) == mask) {
+ mask |= mask >> 1;
+ if (++expect > 3) /* (truly 5 for ucs-4) */
+ return ACR_EINVAL;
+ }
+ newch = ch & ~mask;
+ eating = expect + 1;
+ if (inbytes <= expect)
+ return ACR_INCOMPLETE;
+ /* Reject values of excessive leading 0 bits
+ * utf-8 _demands_ the shortest possible byte length
+ */
+ if (expect == 1) {
+ if (!(newch & 0036))
+ return ACR_EINVAL;
+ }
+ else {
+ /* Reject values of excessive leading 0 bits
+ */
+ if (!newch && !((unsigned char)*in & 0077 & (mask << 1)))
+ return ACR_EINVAL;
+ if (expect == 2) {
+ /* Reject values D800-DFFF when not utf16 encoded
+ * (may not be an appropriate restriction for ucs-4)
+ */
+ if (newch == 0015 && ((unsigned char)*in & 0040))
+ return ACR_EINVAL;
+ }
+ else if (expect == 3) {
+ /* Short circuit values > 110000
+ */
+ if (newch > 4)
+ return ACR_EINVAL;
+ if (newch == 4 && ((unsigned char)*in & 0060))
+ return ACR_EINVAL;
+ }
+ }
+ /* Where the boolean (expect > 2) is true, we will need
+ * an extra word for the output.
+ */
+ if (*outwords < (jsize)(expect > 2) + 1)
+ break; /* buffer full */
+ while (expect--) {
+ /* Multibyte Continuation must be legal */
+ if (((ch = (unsigned char)*(in++)) & 0300) != 0200)
+ return ACR_EINVAL;
+ newch <<= 6;
+ newch |= (ch & 0077);
+ }
+ inbytes -= eating;
+ /* newch is now a true ucs-4 character
+ *
+ * now we need to fold to ucs-2
+ */
+ if (newch < 0x10000) {
+ --*outwords;
+ *(out++) = (jchar) newch;
+ }
+ else {
+ *outwords -= 2;
+ newch -= 0x10000;
+ *(out++) = (jchar) (0xD800 | (newch >> 10));
+ *(out++) = (jchar) (0xDC00 | (newch & 0x03FF));
+ }
+ }
+ }
+ }
+ /* Buffer full 'errors' aren't errors, the client must inspect both
+ * the inbytes and outwords values
+ */
+ return ACR_SUCCESS;
+}
+
+static int conv_utf8_to_wcs(const char *in, size_t inbytes,
+ wchar_t *out, size_t *outwords)
+{
+ acr_i64_t newch, mask;
+ size_t expect, eating;
+ int ch;
+
+ while (inbytes && *outwords) {
+ ch = (unsigned char)(*in++);
+ if (!(ch & 0200)) {
+ /* US-ASCII-7 plain text
+ */
+ --inbytes;
+ --*outwords;
+ *(out++) = ch;
+ }
+ else {
+ if ((ch & 0300) != 0300) {
+ /* Multibyte Continuation is out of place
+ */
+ return ACR_EINVAL;
+ }
+ else {
+ /* Multibyte Sequence Lead Character
+ *
+ * Compute the expected bytes while adjusting
+ * or lead byte and leading zeros mask.
+ */
+ mask = 0340;
+ expect = 1;
+ while ((ch & mask) == mask) {
+ mask |= mask >> 1;
+ if (++expect > 3) /* (truly 5 for ucs-4) */
+ return ACR_EINVAL;
+ }
+ newch = ch & ~mask;
+ eating = expect + 1;
+ if (inbytes <= expect)
+ return ACR_INCOMPLETE;
+ /* Reject values of excessive leading 0 bits
+ * utf-8 _demands_ the shortest possible byte length
+ */
+ if (expect == 1) {
+ if (!(newch & 0036))
+ return ACR_EINVAL;
+ }
+ else {
+ /* Reject values of excessive leading 0 bits
+ */
+ if (!newch && !((unsigned char)*in & 0077 & (mask << 1)))
+ return ACR_EINVAL;
+ if (expect == 2) {
+ /* Reject values D800-DFFF when not utf16 encoded
+ * (may not be an appropriate restriction for ucs-4)
+ */
+ if (newch == 0015 && ((unsigned char)*in & 0040))
+ return ACR_EINVAL;
+ }
+ else if (expect == 3) {
+ /* Short circuit values > 110000
+ */
+ if (newch > 4)
+ return ACR_EINVAL;
+ if (newch == 4 && ((unsigned char)*in & 0060))
+ return ACR_EINVAL;
+ }
+ }
+ /* Where the boolean (expect > 2) is true, we will need
+ * an extra word for the output.
+ */
+ if (*outwords < (size_t)(expect > 2) + 1)
+ break; /* buffer full */
+ while (expect--) {
+ /* Multibyte Continuation must be legal */
+ if (((ch = (unsigned char)*(in++)) & 0300) != 0200)
+ return ACR_EINVAL;
+ newch <<= 6;
+ newch |= (ch & 0077);
+ }
+ inbytes -= eating;
+#if CC_SIZEOF_WCHAR_T == 2
+ /* newch is now a true ucs-4 character
+ *
+ * now we need to fold to ucs-2
+ */
+ if (newch < 0x10000) {
+ --*outwords;
+ *(out++) = (wchar_t) newch;
+ }
+ else {
+ *outwords -= 2;
+ newch -= 0x10000;
+ *(out++) = (wchar_t) (0xD800 | (newch >> 10));
+ *(out++) = (wchar_t) (0xDC00 | (newch & 0x03FF));
+ }
+#else
+ --*outwords;
+ *(out++) = (wchar_t) newch;
+#endif
+ }
+ }
+ }
+ /* Buffer full 'errors' aren't errors, the client must inspect both
+ * the inbytes and outwords values
+ */
+ return ACR_SUCCESS;
+}
+
+/* Java implementation of GetStringUTF is bogus.
+ * It breaks on embeded NUL in strings.
+ * Use the APR implementation instead.
+ */
+static int conv_ucs2_to_utf8(const jchar *in, jsize inwords,
+ char *out, jsize *outbytes)
+{
+ acr_i64_t newch, require;
+ jsize need;
+ char *invout;
+ int ch;
+
+ while (inwords && *outbytes) {
+ ch = (unsigned short)(*in++);
+ if (ch < 0x80) {
+ --inwords;
+ --*outbytes;
+ *(out++) = (unsigned char) ch;
+ }
+ else {
+ if ((ch & 0xFC00) == 0xDC00) {
+ /* Invalid Leading ucs-2 Multiword Continuation Character
+ */
+ return ACR_EINVAL;
+ }
+ if ((ch & 0xFC00) == 0xD800) {
+ /* Leading ucs-2 Multiword Character
+ */
+ if (inwords < 2) {
+ /* Missing ucs-2 Multiword Continuation Character
+ */
+ return ACR_INCOMPLETE;
+ }
+ if (((unsigned short)(*in) & 0xFC00) != 0xDC00) {
+ /* Invalid ucs-2 Multiword Continuation Character
+ */
+ return ACR_EINVAL;
+ }
+ newch = (ch & 0x03FF) << 10 | ((unsigned short)(*in++) &
0x03FF);
+ newch += 0x10000;
+ }
+ else {
+ /* ucs-2 Single Word Character
+ */
+ newch = ch;
+ }
+ /* Determine the absolute minimum utf-8 bytes required
+ */
+ require = newch >> 11;
+ need = 1;
+ while (require)
+ require >>= 5, ++need;
+ if (need >= *outbytes)
+ break; /* Insufficient buffer */
+ inwords -= (need > 2) + 1;
+ *outbytes -= need + 1;
+ /* Compute the utf-8 characters in last to first order,
+ * calculating the lead character length bits along the way.
+ */
+ ch = 0200;
+ out += need + 1;
+ invout = out;
+ while (need--) {
+ ch |= ch >> 1;
+ *(--invout) = (unsigned char)(0200 | (newch & 0077));
+ newch >>= 6;
+ }
+ /* Compute the lead utf-8 character and move the dest offset
+ */
+ *(--invout) = (unsigned char)(ch | newch);
+ }
+ }
+ /* Buffer full 'errors' aren't errors, the client must inspect both
+ * the inwords and outbytes values
+ */
+ return ACR_SUCCESS;
+}
+
+static jsize java_ucs2_to_utf8_len(const jchar *in, jsize inwords)
+{
+ jsize need = 1;
+ int ch;
+
+ while (inwords) {
+ ch = (unsigned short)(*in++);
+ if (ch == 0)
+ need += 2;
+ if (ch < 0x80)
+ need += 1;
+ else {
+ if (ch < 0x0800)
+ need += 2;
+ else
+ need += 3;
+ }
+ --inwords;
+ }
+ /* Buffer full 'errors' aren't errors, the client must inspect both
+ * the inwords and outbytes values
+ */
+ return need;
+}
+
+/* Modified UTF-8 according to the java.io.DataInput
+ * specification
+ */
+static int java_ucs2_to_utf8(const jchar *in, jsize inwords,
+ char *out, jsize *outbytes)
+{
+ int ch;
+
+ while (inwords && *outbytes) {
+ ch = (unsigned short)(*in++);
+ if (ch == 0) {
+ if (*outbytes < 2)
+ return ACR_INCOMPLETE;
+ *outbytes -= 2;
+ *(out++) = (unsigned char)0xC0;
+ *(out++) = (unsigned char)0x80;
+ }
+ if (ch < 0x80) {
+ --*outbytes;
+ *(out++) = (unsigned char)ch;
+ }
+ else {
+ if (ch < 0x0800) {
+ /* Two byte sequence
+ */
+ if (*outbytes < 2)
+ return ACR_INCOMPLETE;
+ *outbytes -= 2;
+ *(out++) = (unsigned char)(0xC0 | ((ch >> 6) & 0x1F));
+ *(out++) = (unsigned char)(0x80 | ((ch) & 0x3F));
+ }
+ else {
+ /* Three byte sequence
+ */
+ if (*outbytes < 3)
+ return ACR_INCOMPLETE;
+ *outbytes -= 3;
+ *(out++) = (unsigned char)(0xE0 | ((ch >> 12) & 0x0F));
+ *(out++) = (unsigned char)(0x80 | ((ch >> 6) & 0x3F));
+ *(out++) = (unsigned char)(0x80 | ((ch) & 0x3F));
+ }
+ }
+ --inwords;
+ }
+ if (*outbytes) {
+ *(out++) = '\0';
+ --*outbytes;
+ }
+ return ACR_SUCCESS;
+}
+/* Modified UTF-8 according to the java.io.DataOutput
+ * specification
+ */
+static int java_utf8_to_ucs2(const char *in, jsize inbytes,
+ jchar *out, jsize *outwords)
+{
+ int i, ch;
+
+ while (inbytes && *outwords) {
+ ch = (unsigned char)(*in++);
+ if (ch == 0) {
+ --*outwords;
+ *(out) = (jchar)0;
+ return ACR_SUCCESS;
+ }
+ else if (!(ch & 0x80)) {
+ /* US-ASCII-7 plain text
+ */
+ --inbytes;
+ }
+ else if ((ch & 0xE0) == 0xC0) {
+ /* Two byte sequence */
+ if (inbytes < 2)
+ return ACR_INCOMPLETE;
+ inbytes -= 2;
+ ch = ch & 0x1F;
+ if ((*in & 0xC0) != 0x80)
+ return ACR_EILSEQ;
+ ch <<= 6;
+ ch |= (unsigned char)(*(in++) & 0x3F);
+ }
+ else if ((ch & 0xF0) == 0xE0) {
+ /* Three byte sequence */
+ if (inbytes < 3)
+ return ACR_INCOMPLETE;
+ inbytes -= 3;
+ ch = ch & 0x0F;
+ for (i = 0; i < 2; i++) {
+ if ((*in & 0xC0) != 0x80)
+ return ACR_EILSEQ;
+ ch <<= 6;
+ ch |= (unsigned char)(*(in++) & 0x3F);
+ }
+ }
+ else {
+ return ACR_EILSEQ;
+ }
+ *(out++) = (jchar)ch;
+ --*outwords;
+ }
+ return ACR_SUCCESS;
+}
+
+jchar *
+AcrUtf8ToUcs2(JNIEnv *_E, const char *str)
+{
+ int rc;
+ jsize len, out;
+ jchar *dst;
+
+ out = len = (jsize)strlen(str) + 1;
+ dst = ACR_MALLOC(jchar, len);
+ if (!dst)
+ return NULL;
+ if ((rc = java_utf8_to_ucs2(str, len, dst, &out))) {
+ /* Invalid UTF-8 string */
+ AcrFree(dst);
+ ACR_SET_OS_ERROR(rc);
+ return NULL;
+ }
+ return dst;
+}
+
+wchar_t *
+AcrUtf8ToWcs(JNIEnv *_E, const char *str)
+{
+ int rc;
+ size_t len, out;
+ wchar_t *dst;
+
+ out = len = strlen(str) + 1;
+ dst = ACR_MALLOC(wchar_t, len);
+ if (!dst)
+ return NULL;
+ if ((rc = conv_utf8_to_wcs(str, len, dst, &out))) {
+ /* Invalid UTF-8 string */
+ AcrFree(dst);
+ ACR_SET_OS_ERROR(rc);
+ return NULL;
+ }
+ return dst;
+}
+
+char *
+AcrUsc2ToUtf8(JNIEnv *_E, const jchar *str, jsize len)
+{
+ int rc;
+ jsize out;
+ char *dst;
+
+ out = java_ucs2_to_utf8_len(str, len);
+ dst = ACR_MALLOC(char, out);
+ if (!dst)
+ return NULL;
+ if ((rc = java_ucs2_to_utf8(str, len, dst, &out))) {
+ /* Invalid UTF-8 string */
+ AcrFree(dst);
+ ACR_SET_OS_ERROR(rc);
+ return NULL;
+ }
+ return dst;
+}
+
+static char *get_string_utf_8(JNIEnv *_E, jstring str, char *b)
+{
+ jsize sl, nl;
+ const jchar *sr;
+ char *rv = NULL;
+
+ if (!str) {
+ return NULL;
+ }
+ if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+ /* JNI out of memory error */
+ return NULL;
+ }
+ sl = (*_E)->GetStringLength(_E, str);
+ nl = sl * 3;
+ if (b && nl < ACR_MBUFF_LEN)
+ rv = b;
+ else {
+ rv = ACR_MALLOC(char, nl + 1);
+ if (!rv) {
+ /* Exception has already neen throw from ACR_Malloc
+ */
+ return NULL;
+ }
+ }
+ sr = (*_E)->GetStringCritical(_E, str, NULL);
+ if (!sr) {
+ if (rv != b)
+ AcrFree(rv);
+ return NULL;
+ }
+ else {
+ jsize ol = nl;
+ if (conv_ucs2_to_utf8(sr, sl, rv, &nl) == ACR_SUCCESS)
+ rv[ol - nl] = '\0';
+ else {
+ /* XXX: Throw some exception ?
+ */
+ if (rv != b)
+ AcrFree(rv);
+ return NULL;
+ }
+ }
+ (*_E)->ReleaseStringCritical(_E, str, sr);
+ return rv;
+}
+
+static char *get_string_default(JNIEnv *_E, jstring str, char *b)
+{
+ jbyteArray sb = NULL;
+ char *rs = NULL;
+
+ if (!_clazzn.i || !J4MID(0000)) {
+ ACR_SET_OS_ERROR(ACR_EINIT);
+ return NULL;
+ }
+ sb = CALL_METHOD0(Object, 0001, str);
+ if ((*_E)->ExceptionCheck(_E))
+ return NULL;
+ else {
+ jint len = (*_E)->GetArrayLength(_E, sb);
+ if (b && len < ACR_PBUFF_LEN) {
+ /* Use provided stack storage */
+ rs = b;
+ }
+ else {
+ rs = ACR_MALLOC(char, len + 1);
+ if (rs == NULL) {
+ (*_E)->DeleteLocalRef(_E, sb);
+ return NULL;
+ }
+ }
+ (*_E)->GetByteArrayRegion(_E, sb, 0, len, (jbyte *)rs);
+ rs[len] = '\0'; /* NUL-terminate */
+ }
+ (*_E)->DeleteLocalRef(_E, sb);
+ return rs;
+}
+
+static jstring new_string_default(JNIEnv *_E, const char *str)
+{
+ jstring rs;
+ jbyteArray ba;
+ jsize sl;
+
+ sl = (jsize)strlen(str);
+ ba = (*_E)->NewByteArray(_E, sl);
+ if (ba != NULL) {
+ (*_E)->SetByteArrayRegion(_E, ba, 0, sl, (jbyte *)str);
+ rs = (*_E)->NewObject(_E, _clazzn.i, J4MID(0000), ba);
+ (*_E)->DeleteLocalRef(_E, ba);
+ return rs;
+ }
+ return NULL;
+}
+
+static jstring new_string_iso_8859_1(JNIEnv *_E, const char *s)
+{
+ jstring rs = NULL;
+ if (s) {
+ size_t l = strlen(s);
+ if (l < ACR_MBUFF_SIZ) {
+ jchar cc[ACR_MBUFF_SIZ];
+ size_t i;
+ for (i = 0; i < l; i++) {
+ cc[i] = s[i];
+ }
+ rs = (*_E)->NewString(_E, cc, (jsize)l);
+ }
+ else {
+ jchar *cc;
+ if ((cc = ACR_MALLOC(jchar, l + 1))) {
+ size_t i;
+ for (i = 0; i < l; i++) {
+ cc[i] = s[i];
+ }
+ rs = (*_E)->NewString(_E, cc, (jsize)l);
+ AcrFree(cc);
+ }
+ }
+ }
+ return rs;
+}
+
+static jstring new_string_utf_8(JNIEnv *_E, const char *s)
+{
+ jstring rs = NULL;
+ if (s) {
+ int ex;
+ jsize sl = (jsize)strlen(s);
+ if (sl < ACR_MBUFF_SIZ) {
+ jchar cc[ACR_MBUFF_SIZ];
+ jsize wl = ACR_MBUFF_LEN;
+ if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS)
+ rs = (*_E)->NewString(_E, cc, sl);
+ else
+ AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL, ex);
+ }
+ else {
+ jchar *cc;
+ if ((cc = ACR_MALLOC(jchar, sl + 1))) {
+ jsize wl = sl;
+ if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS)
+ rs = (*_E)->NewString(_E, cc, sl);
+ else
+ AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL,
ex);
+ AcrFree(cc);
+ }
+ }
+ }
+ return rs;
+}
+
+
+wchar_t *
+AcrGetJavaStringW(JNIEnv *_E, jstring str, wchar_t *b)
+{
+ jsize sl;
+ const jchar *sr;
+ wchar_t *rv = NULL;
+
+ if (!str) {
+ return NULL;
+ }
+ if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+ /* JNI out of memory error */
+ return NULL;
+ }
+ sl = (*_E)->GetStringLength(_E, str);
+ if (b && sl < ACR_MBUFF_LEN)
+ rv = b;
+ else {
+ rv = ACR_MALLOC(wchar_t, sl + 1);
+ if (!rv) {
+ /* Exception has already neen throw from ACR_Malloc
+ */
+ return NULL;
+ }
+ }
+ sr = (*_E)->GetStringCritical(_E, str, NULL);
+ if (!sr) {
+ if (rv != b)
+ AcrFree(rv);
+ return NULL;
+ }
+ else {
+#if CC_SIZEOF_WCHAR_T == 2
+ memcpy(rv, sr, sl * sizeof(wchar_t));
+#else
+ jsize i;
+ for (i = 0; i < sl; i++)
+ rv[i] = sr[i];
+#endif
+ }
+ rv[sl] = L'\0';
+ (*_E)->ReleaseStringCritical(_E, str, sr);
+ return rv;
+}
+
+char *
+AcrGetJavaStringA(JNIEnv *_E, jstring str, char *b)
+{
+ char *rv = NULL;
+ if (!str) {
+ return NULL;
+ }
+ if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+ /* JNI out of memory error */
+ return NULL;
+ }
+ switch (acr_native_codepage) {
+ case ACR_CP_ISO8859_1:
+ rv = get_string_iso_8859_1(_E, str, b);
+ break;
+ case ACR_CP_UTF_8:
+ rv = get_string_utf_8(_E, str, b);
+ break;
+ default:
+ rv = get_string_default(_E, str, b);
+ break;
+ }
+ return rv;
+}
+
+jstring
+AcrNewJavaStringW(JNIEnv *_E, const wchar_t *s)
+{
+ jstring r = NULL;
+ if (s) {
+ size_t l = wcslen(s);
+#if CC_SIZEOF_WCHAR_T == 2
+ r = (*_E)->NewString(_E, (const jchar *)s, (jsize)l);
+#else
+ if (l < ACR_MBUFF_SIZ) {
+ jchar cc[ACR_MBUFF_SIZ];
+ size_t i;
+ for (i = 0; i < l; i++) {
+ /* Simply assign utf32 to utf16 */
+ cc[i] = (jchar)s[i];
+ }
+ r = (*_E)->NewString(_E, cc, l);
+ }
+ else {
+ jchar *cc;
+ if ((cc = ACR_MALLOC(jchar, l + 1))) {
+ size_t i;
+ for (i = 0; i < l; i++) {
+ /* Simply assign utf32 to utf16 */
+ cc[i] = (jchar)s[i];
+ }
+ r = (*_E)->NewString(_E, cc, l);
+ AcrFree(cc);
+ }
+ }
+#endif
+ }
+ return r;
+}
+
+jstring
+AcrNewJavaStringA(JNIEnv *_E, const char *str)
+{
+ jstring rv = NULL;
+ if (!str)
+ return NULL;
+ if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+ /* JNI out of memory error */
+ return NULL;
+ }
+ switch (acr_native_codepage) {
+ case ACR_CP_ISO8859_1:
+ rv = new_string_iso_8859_1(_E, str);
+ break;
+ case ACR_CP_UTF_8:
+ rv = new_string_utf_8(_E, str);
+ break;
+ default:
+ rv = new_string_default(_E, str);
+ break;
+ }
+ return rv;
+}
+
+jstring
+AcrNewJavaStringU(JNIEnv *_E, const char *str)
+{
+ if (!str)
+ return NULL;
+ if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) {
+ /* JNI out of memory error */
+ return NULL;
+ }
+ return new_string_utf_8(_E, str);
+}
Propchange: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
------------------------------------------------------------------------------
svn:eol-style = native