http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49269
Summary: wifstream::tellg reports invalid stream position after reading single wchar_t character with NullCodecvt Product: gcc Version: 4.5.2 Status: UNCONFIRMED Severity: normal Priority: P3 Component: libstdc++ AssignedTo: unassig...@gcc.gnu.org ReportedBy: michael...@gmail.com Let us consider the following case: 1. wifstream with NullCodecvt, binary mode. 2. input ucs2 samples.txt file (hex editor dump): FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0 1] I believe the tellg funtion below shall return the stream position 2 after reading single wide character. But it returs 1 instead though as you may see the wifstream::read call reads two bytes indeed. You may also see that subsequent read operations will be incorrect -- you will get 5bff instead of 005b. -- Michael Kochetkov #include <fstream> #include <iostream> #include <stdexcept> using std::codecvt ; typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ; class NullCodecvt : public NullCodecvtBase { public: typedef wchar_t _E ; typedef char _To ; typedef mbstate_t _St ; explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { } protected: virtual result do_in( _St& _State , const _To* _F1 , const _To* _L1 , const _To*& _Mid1 , _E* F2 , _E* _L2 , _E*& _Mid2 ) const { return noconv ; } virtual result do_out( _St& _State , const _E* _F1 , const _E* _L1 , const _E*& _Mid1 , _To* F2, _E* _L2 , _To*& _Mid2 ) const { return noconv ; } virtual result do_unshift( _St& _State , _To* _F2 , _To* _L2 , _To*& _Mid2 ) const { return noconv ; } virtual int do_length( _St& _State , const _To* _F1 , const _To* _L1 , size_t _N2 ) const throw() { return static_cast<int>((_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1); } virtual bool do_always_noconv() const throw() { return true ; } virtual int do_max_length() const throw() { return 2 ; } virtual int do_encoding() const throw() { return 2 ; } }; int main() { try { std::wifstream is; is.imbue(std::locale(std::locale::classic(), new NullCodecvt())); is.exceptions(std::ios::badbit); // Samples.txt shall look in hex like this: // FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0 1] is.open("samples.txt",std::ios::in | std::ios::binary); unsigned int bom = 0; is.read(reinterpret_cast<wchar_t*>(&bom),1); const unsigned short bomLE = 0xFEFF; if (bom != bomLE) { throw std::runtime_error("Invalid BOM. Only LE is supported"); } std::cout << "Current position: " << is.tellg() << std::endl; } catch(const std::exception& e) { std::cout << e.what() << std::endl; } }