Hi,
I can successfully load files from HDFS via the C API like -
#include "hdfs.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
int main(int argc, char **argv) {
hdfsFS fs = hdfsConnect("127.0.0.1", 9000);
const char* readPath = "/lineitem.tbl";
hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0);
if(!readFile) {
fprintf(stderr, "Failed to open %s for reading!\n", readPath);
exit(-1);
}
if (!hdfsFileIsOpenForRead(readFile)) {
fprintf(stderr, "hdfsFileIsOpenForRead: we just opened a file with
O_RDONLY, and it did not show up as 'open for read'\n");
exit(-1);
}
int size_in_bytes = hdfsAvailable(fs, readFile);
fprintf(stderr, "hdfsAvailable: %d\n", size_in_bytes);
char *buffer;
buffer = (char*)malloc(sizeof(char)*(size_in_bytes+1));
memset(buffer, 0, sizeof(buffer));
int num_read_bytes = 0;
while (num_read_bytes < size_in_bytes) {
int rbytes = hdfsRead(fs, readFile, &buffer[num_read_bytes],
size_in_bytes);
num_read_bytes += rbytes;
}
printf("%s\n", buffer);
printf("Total bytes read = %d\n", num_read_bytes);
free(buffer);
hdfsCloseFile(fs, readFile);
hdfsDisconnect(fs);
}
and I am able to see all the contents of the file printed out successfully.
But when I try to use the zero copy API like -
#include "hdfs.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
int main(int argc, char **argv) {
hdfsFS fs = hdfsConnect("127.0.0.1", 9000);
const char* readPath = "/lineitem.tbl";
hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0);
if(!readFile) {
fprintf(stderr, "Failed to open %s for reading!\n", readPath);
exit(-1);
}
if (!hdfsFileIsOpenForRead(readFile)) {
fprintf(stderr, "hdfsFileIsOpenForRead: we just opened a file with
O_RDONLY, and it did not show up as 'open for read'\n");
exit(-1);
}
int size_in_bytes = hdfsAvailable(fs, readFile);
fprintf(stderr, "hdfsAvailable: %d\n", size_in_bytes);
struct hadoopRzOptions *opts = NULL;
opts = hadoopRzOptionsAlloc();
if (!opts) {
fprintf(stderr, "Unable to set zero copy options\n");
exit(-1);
}
if (hadoopRzOptionsSetSkipChecksum(opts, 1)) {
fprintf(stderr, "Unable to set skip checksum\n");
exit(-1);
}
/*if (hadoopRzOptionsSetByteBufferPool(opts, NULL)) {
fprintf(stderr, "Unable to set byte buffer pool\n");
exit(-1);
}*/
struct hadoopRzBuffer *hbuffer = NULL;
//hadoopRzBufferFree(readFile, hbuffer);
hbuffer = hadoopReadZero(readFile, opts, 100);
if (!hbuffer) {
fprintf(stderr, "Unable to read zero copy hdfs file\n");
exit(-1);
}
char *buffer; buffer = (char*)malloc(sizeof(char)*(size_in_bytes+1));
memset(buffer, 0, sizeof(buffer));
buffer = hadoopRzBufferGet(hbuffer);
int num_read_bytes = hadoopRzBufferLength(hbuffer);
printf("Actual size = %d\n", size_in_bytes);
printf("Bytes read = %d\n", num_read_bytes);
//printf("%s\n", buffer);
//printf("%s\n", buffer[size_in_bytes - 1000]);
hdfsCloseFile(fs, readFile);
}
I get the error - "Unable to read zero copy hdfs file" which means that
hbuffer didn't read anything in.
Am I doing something incorrectly?
Thank you,
--
Pratyush Das