Hi,

I can successfully load files from HDFS via the C API like -

#include "hdfs.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>

int main(int argc, char **argv) {
    hdfsFS fs = hdfsConnect("127.0.0.1", 9000);
    const char* readPath = "/lineitem.tbl";
    hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0);
    if(!readFile) {
          fprintf(stderr, "Failed to open %s for reading!\n", readPath);
          exit(-1);
    }
    if (!hdfsFileIsOpenForRead(readFile)) {
        fprintf(stderr, "hdfsFileIsOpenForRead: we just opened a file with
O_RDONLY, and it did not show up as 'open for read'\n");
        exit(-1);
    }
    int size_in_bytes = hdfsAvailable(fs, readFile);
    fprintf(stderr, "hdfsAvailable: %d\n", size_in_bytes);
    char *buffer;
    buffer = (char*)malloc(sizeof(char)*(size_in_bytes+1));
    memset(buffer, 0, sizeof(buffer));
    int num_read_bytes = 0;
    while (num_read_bytes < size_in_bytes) {
        int rbytes = hdfsRead(fs, readFile, &buffer[num_read_bytes],
size_in_bytes);
        num_read_bytes += rbytes;
    }
    printf("%s\n", buffer);
    printf("Total bytes read = %d\n", num_read_bytes);
    free(buffer);
    hdfsCloseFile(fs, readFile);
    hdfsDisconnect(fs);
}

and I am able to see all the contents of the file printed out successfully.

But when I try to use the zero copy API like -

#include "hdfs.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>

int main(int argc, char **argv) {
    hdfsFS fs = hdfsConnect("127.0.0.1", 9000);
    const char* readPath = "/lineitem.tbl";
    hdfsFile readFile = hdfsOpenFile(fs, readPath, O_RDONLY, 0, 0, 0);
    if(!readFile) {
          fprintf(stderr, "Failed to open %s for reading!\n", readPath);
          exit(-1);
    }
    if (!hdfsFileIsOpenForRead(readFile)) {
        fprintf(stderr, "hdfsFileIsOpenForRead: we just opened a file with
O_RDONLY, and it did not show up as 'open for read'\n");
        exit(-1);
    }
    int size_in_bytes = hdfsAvailable(fs, readFile);
    fprintf(stderr, "hdfsAvailable: %d\n", size_in_bytes);
    struct hadoopRzOptions *opts = NULL;
    opts = hadoopRzOptionsAlloc();
    if (!opts) {
        fprintf(stderr, "Unable to set zero copy options\n");
        exit(-1);
    }
    if (hadoopRzOptionsSetSkipChecksum(opts, 1)) {
        fprintf(stderr, "Unable to set skip checksum\n");
        exit(-1);
    }
    /*if (hadoopRzOptionsSetByteBufferPool(opts, NULL)) {
        fprintf(stderr, "Unable to set byte buffer pool\n");
        exit(-1);
    }*/
    struct hadoopRzBuffer *hbuffer = NULL;
    //hadoopRzBufferFree(readFile, hbuffer);
    hbuffer = hadoopReadZero(readFile, opts, 100);
    if (!hbuffer) {
        fprintf(stderr, "Unable to read zero copy hdfs file\n");
        exit(-1);
    }
    char *buffer; buffer = (char*)malloc(sizeof(char)*(size_in_bytes+1));
    memset(buffer, 0, sizeof(buffer));
    buffer = hadoopRzBufferGet(hbuffer);
    int num_read_bytes = hadoopRzBufferLength(hbuffer);
    printf("Actual size = %d\n", size_in_bytes);
    printf("Bytes read = %d\n", num_read_bytes);
    //printf("%s\n", buffer);
    //printf("%s\n", buffer[size_in_bytes - 1000]);
    hdfsCloseFile(fs, readFile);
}

I get the error - "Unable to read zero copy hdfs file" which means that
hbuffer didn't read anything in.

Am I doing something incorrectly?

Thank you,

-- 
Pratyush Das

Reply via email to