http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49269
Summary: wifstream::tellg reports invalid stream position after
reading single wchar_t character with NullCodecvt
Product: gcc
Version: 4.5.2
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: libstdc++
AssignedTo: [email protected]
ReportedBy: [email protected]
Let us consider the following case:
1. wifstream with NullCodecvt, binary mode.
2. input ucs2 samples.txt file (hex editor dump):
FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0 1]
I believe the tellg funtion below shall return the stream position 2 after
reading single wide character. But it returs 1 instead though as you may see
the wifstream::read call reads two bytes indeed.
You may also see that subsequent read operations will be incorrect -- you will
get 5bff instead of 005b.
--
Michael Kochetkov
#include <fstream>
#include <iostream>
#include <stdexcept>
using std::codecvt ;
typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ;
class NullCodecvt
: public NullCodecvtBase
{
public:
typedef wchar_t _E ;
typedef char _To ;
typedef mbstate_t _St ;
explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { }
protected:
virtual result do_in( _St& _State ,
const _To* _F1 , const _To* _L1 , const _To*& _Mid1 ,
_E* F2 , _E* _L2 , _E*& _Mid2
) const
{
return noconv ;
}
virtual result do_out( _St& _State ,
const _E* _F1 , const _E* _L1 , const _E*& _Mid1 ,
_To* F2, _E* _L2 , _To*& _Mid2
) const
{
return noconv ;
}
virtual result do_unshift( _St& _State ,
_To* _F2 , _To* _L2 , _To*& _Mid2 ) const
{
return noconv ;
}
virtual int do_length( _St& _State , const _To* _F1 ,
const _To* _L1 , size_t _N2 ) const throw()
{
return static_cast<int>((_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1);
}
virtual bool do_always_noconv() const throw()
{
return true ;
}
virtual int do_max_length() const throw()
{
return 2 ;
}
virtual int do_encoding() const throw()
{
return 2 ;
}
};
int
main() {
try {
std::wifstream is;
is.imbue(std::locale(std::locale::classic(), new NullCodecvt()));
is.exceptions(std::ios::badbit);
// Samples.txt shall look in hex like this:
// FEFF 005B 0030 0020 ¦ 0020 0020 0031 005D | ?[0 1]
is.open("samples.txt",std::ios::in | std::ios::binary);
unsigned int bom = 0;
is.read(reinterpret_cast<wchar_t*>(&bom),1);
const unsigned short bomLE = 0xFEFF;
if (bom != bomLE) {
throw std::runtime_error("Invalid BOM. Only LE is supported");
}
std::cout << "Current position: " << is.tellg() << std::endl;
}
catch(const std::exception& e) {
std::cout << e.what() << std::endl;
}
}