jasperjiaguo commented on code in PR #9114: URL: https://github.com/apache/pinot/pull/9114#discussion_r935808330
########## pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java: ########## @@ -560,4 +562,42 @@ public static String decodeUrl(String input) throws UnsupportedEncodingException { return URLDecoder.decode(input, StandardCharsets.UTF_8.toString()); } + + /** + * @param input binary data + * @return Base64 encoded String + */ + @ScalarFunction + public static String binaryToBase64(byte[] input) { + return Base64.getEncoder().encodeToString(input); Review Comment: Do we error if the user passes a string to the `toBase64` function, or some implicit type casting will happen? I Users MySQL might not expect the syntax of `toBase64(toUtf8("hello"))`. Similarly, if the user calls on `fromBase64('')`, they might not expecting us to return hex string. We should at least call this out in the documentation. ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java: ########## @@ -600,6 +599,173 @@ public void testUrlFunc() assertEquals(decodedString, expectedUrlStr); } + @Test + public void testBase64Func() + throws Exception { + + // string literal + String sqlQuery = "SELECT toBase64(toUtf8('hello!')), " + "fromUtf8(fromBase64('aGVsbG8h')) FROM myTable"; + JsonNode response = postQuery(sqlQuery, _brokerBaseApiUrl); + JsonNode resultTable = response.get("resultTable"); + JsonNode dataSchema = resultTable.get("dataSchema"); + assertEquals(dataSchema.get("columnDataTypes").toString(), "[\"STRING\",\"STRING\"]"); + JsonNode rows = response.get("resultTable").get("rows"); + + String encodedString = rows.get(0).get(0).asText(); + String expectedEncodedStr = toBase64(toUtf8("hello!")); + assertEquals(encodedString, expectedEncodedStr); + String decodedString = rows.get(0).get(1).asText(); + String expectedDecodedStr = fromUtf8(fromBase64("aGVsbG8h")); + assertEquals(decodedString, expectedDecodedStr); + + // long string literal encode + sqlQuery = + "SELECT toBase64(toUtf8('this is a long string that will encode to more than 76 characters using base64')) " + + "FROM " + + "myTable"; + response = postQuery(sqlQuery, _brokerBaseApiUrl); + resultTable = response.get("resultTable"); + rows = resultTable.get("rows"); + encodedString = rows.get(0).get(0).asText(); + assertEquals(encodedString, + toBase64(toUtf8("this is a long string that will encode to more than 76 characters using base64"))); + + // long string literal decode + sqlQuery = "SELECT fromUtf8(fromBase64" + + "('dGhpcyBpcyBhIGxvbmcgc3RyaW5nIHRoYXQgd2lsbCBlbmNvZGUgdG8gbW9yZSB0aGFuIDc2IGNoYXJhY3RlcnMgdXNpbmcgYmFzZTY0" + + "')) FROM myTable"; + response = postQuery(sqlQuery, _brokerBaseApiUrl); + resultTable = response.get("resultTable"); + rows = resultTable.get("rows"); + decodedString = rows.get(0).get(0).asText(); + assertEquals(decodedString, fromUtf8(fromBase64( + "dGhpcyBpcyBhIGxvbmcgc3RyaW5nIHRoYXQgd2lsbCBlbmNvZGUgdG8gbW9yZSB0aGFuIDc2IGNoYXJhY3RlcnMgdXNpbmcgYmFzZTY0"))); + + // non-string literal + sqlQuery = "SELECT toBase64(toUtf8(123)), fromUtf8(fromBase64(toBase64(toUtf8(123)))), 123 FROM myTable"; + response = postQuery(sqlQuery, _brokerBaseApiUrl); + resultTable = response.get("resultTable"); + rows = resultTable.get("rows"); + encodedString = rows.get(0).get(0).asText(); + decodedString = rows.get(0).get(1).asText(); + String originalCol = rows.get(0).get(2).asText(); + assertEquals(decodedString, originalCol); + assertEquals(encodedString, toBase64(toUtf8("123"))); + + // identifier + sqlQuery = + "SELECT Carrier, toBase64(toUtf8(Carrier)), fromUtf8(fromBase64(toBase64(toUtf8(Carrier)))), fromBase64" + + "(toBase64(toUtf8(Carrier))) FROM myTable LIMIT 100"; + response = postQuery(sqlQuery, _brokerBaseApiUrl); + resultTable = response.get("resultTable"); + dataSchema = resultTable.get("dataSchema"); + assertEquals(dataSchema.get("columnDataTypes").toString(), "[\"STRING\",\"STRING\",\"STRING\",\"BYTES\"]"); + rows = response.get("resultTable").get("rows"); + assertEquals(rows.size(), 100); + for (int i = 0; i < 100; i++) { + String original = rows.get(0).asText(); + String encoded = rows.get(1).asText(); + String decoded = rows.get(2).asText(); + assertEquals(original, decoded); + assertEquals(encoded, toBase64(toUtf8(original))); + assertEquals(decoded, fromUtf8(fromBase64(toBase64(toUtf8(original))))); + } + + // invalid argument Review Comment: Should we add a test case to explicitly check results/error on user passing string to the toBase64 function, `select toBase64('hello!')`. Similarly for `select fromBase64('aGVsbG8h!')`? ########## pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java: ########## @@ -393,6 +394,15 @@ public static byte[] toUtf8(String input) { return input.getBytes(StandardCharsets.UTF_8); } + /** + * @param input Review Comment: complete the java doc here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org