http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49269

           Summary: wifstream::tellg reports invalid stream position after
                    reading single wchar_t character with NullCodecvt
           Product: gcc
           Version: 4.5.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: libstdc++
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: michael...@gmail.com


Let us consider the following case:
1. wifstream with NullCodecvt, binary mode.
2. input ucs2 samples.txt file (hex editor dump):
  FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0   1]

I believe the tellg funtion below shall return the stream position 2 after
reading single wide character. But it returs 1 instead though as you may see
the wifstream::read call reads two bytes indeed.
You may also see that subsequent read operations will be incorrect -- you will
get 5bff instead of 005b.

--
Michael Kochetkov

#include <fstream>
#include <iostream>
#include <stdexcept>

using std::codecvt ; 
typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ;

class NullCodecvt
    : public NullCodecvtBase
{

public:
    typedef wchar_t _E ;
    typedef char _To ;
    typedef mbstate_t _St ;

    explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { }

protected:
    virtual result do_in( _St& _State ,
        const _To* _F1 , const _To* _L1 , const _To*& _Mid1 ,
        _E* F2 , _E* _L2 , _E*& _Mid2
        ) const
    {
        return noconv ;
    }
    virtual result do_out( _St& _State ,
        const _E* _F1 , const _E* _L1 , const _E*& _Mid1 ,
        _To* F2, _E* _L2 , _To*& _Mid2
        ) const
    {
        return noconv ;
    }
    virtual result do_unshift( _St& _State , 
        _To* _F2 , _To* _L2 , _To*& _Mid2 ) const
    {
        return noconv ;
    }
    virtual int do_length( _St& _State , const _To* _F1 , 
        const _To* _L1 , size_t _N2 ) const throw()
    {
        return static_cast<int>((_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1);
    }
    virtual bool do_always_noconv() const throw()
    {
        return true ;
    }
    virtual int do_max_length() const throw()
    {
        return 2 ;
    }
    virtual int do_encoding() const throw()
    {
        return 2 ;
    }
};

int
main() {
    try {
        std::wifstream is;
        is.imbue(std::locale(std::locale::classic(), new NullCodecvt()));
        is.exceptions(std::ios::badbit);
        // Samples.txt shall look in hex like this:
        // FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0   1]
        is.open("samples.txt",std::ios::in | std::ios::binary);
        unsigned int bom = 0;
        is.read(reinterpret_cast<wchar_t*>(&bom),1);
        const unsigned short bomLE = 0xFEFF;
        if (bom != bomLE) {
            throw std::runtime_error("Invalid BOM. Only LE is supported");
        }
        std::cout << "Current position: " <<  is.tellg() << std::endl;
    }
    catch(const std::exception& e) {
        std::cout << e.what() << std::endl;
    }
}

Reply via email to