erikjv created this revision. Previously, the column number in a diagnostic would be the byte position in the line. This results in incorrect column numbers when a multi-byte UTF-8 character would be present in the input. By ignoring all bytes starting with 0b10.... the correct column number is created.
This fixes PR21144. https://reviews.llvm.org/D33765 Files: include/clang/Basic/SourceManager.h lib/Basic/SourceManager.cpp test/Misc/diag-utf8.cpp Index: test/Misc/diag-utf8.cpp =================================================================== --- /dev/null +++ test/Misc/diag-utf8.cpp @@ -0,0 +1,11 @@ +// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s + +struct Foo { int member; }; + +void f(Foo foo) +{ + "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo') + "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo') +} + + Index: lib/Basic/SourceManager.cpp =================================================================== --- lib/Basic/SourceManager.cpp +++ lib/Basic/SourceManager.cpp @@ -1055,11 +1055,22 @@ return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second); } +static unsigned correctForMultiByteChars(const char *Buf, unsigned LineStart, + unsigned Column) +{ + unsigned CorrectedColumn = Column; + for (unsigned I = 0; I < Column; ++I) { + if ((Buf[LineStart + I] & 0xc0) == 0x80) + --CorrectedColumn; + } + return CorrectedColumn; +} /// getColumnNumber - Return the column # for the specified file position. /// this is significantly cheaper to compute than the line number. unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos, - bool *Invalid) const { + bool *Invalid, + bool BytePosition) const { bool MyInvalid = false; llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid); if (Invalid) @@ -1093,14 +1104,18 @@ if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n') --FilePos; } - return FilePos - LineStart + 1; + unsigned Column = FilePos - LineStart + 1; + return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart, + Column); } } unsigned LineStart = FilePos; while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') --LineStart; - return FilePos-LineStart+1; + unsigned Column = FilePos - LineStart + 1; + return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart, + Column); } // isInvalid - Return the result of calling loc.isInvalid(), and @@ -1425,7 +1440,8 @@ unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid); if (Invalid) return PresumedLoc(); - unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid); + unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid, + false); if (Invalid) return PresumedLoc(); Index: include/clang/Basic/SourceManager.h =================================================================== --- include/clang/Basic/SourceManager.h +++ include/clang/Basic/SourceManager.h @@ -1275,7 +1275,8 @@ /// on a file sloc, so you must choose a spelling or expansion location /// before calling this method. unsigned getColumnNumber(FileID FID, unsigned FilePos, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr, + bool BytePosition = true) const; unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; unsigned getExpansionColumnNumber(SourceLocation Loc,
Index: test/Misc/diag-utf8.cpp =================================================================== --- /dev/null +++ test/Misc/diag-utf8.cpp @@ -0,0 +1,11 @@ +// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s + +struct Foo { int member; }; + +void f(Foo foo) +{ + "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo') + "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo') +} + + Index: lib/Basic/SourceManager.cpp =================================================================== --- lib/Basic/SourceManager.cpp +++ lib/Basic/SourceManager.cpp @@ -1055,11 +1055,22 @@ return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second); } +static unsigned correctForMultiByteChars(const char *Buf, unsigned LineStart, + unsigned Column) +{ + unsigned CorrectedColumn = Column; + for (unsigned I = 0; I < Column; ++I) { + if ((Buf[LineStart + I] & 0xc0) == 0x80) + --CorrectedColumn; + } + return CorrectedColumn; +} /// getColumnNumber - Return the column # for the specified file position. /// this is significantly cheaper to compute than the line number. unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos, - bool *Invalid) const { + bool *Invalid, + bool BytePosition) const { bool MyInvalid = false; llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid); if (Invalid) @@ -1093,14 +1104,18 @@ if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n') --FilePos; } - return FilePos - LineStart + 1; + unsigned Column = FilePos - LineStart + 1; + return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart, + Column); } } unsigned LineStart = FilePos; while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') --LineStart; - return FilePos-LineStart+1; + unsigned Column = FilePos - LineStart + 1; + return BytePosition ? Column : correctForMultiByteChars(Buf, LineStart, + Column); } // isInvalid - Return the result of calling loc.isInvalid(), and @@ -1425,7 +1440,8 @@ unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid); if (Invalid) return PresumedLoc(); - unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid); + unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid, + false); if (Invalid) return PresumedLoc(); Index: include/clang/Basic/SourceManager.h =================================================================== --- include/clang/Basic/SourceManager.h +++ include/clang/Basic/SourceManager.h @@ -1275,7 +1275,8 @@ /// on a file sloc, so you must choose a spelling or expansion location /// before calling this method. unsigned getColumnNumber(FileID FID, unsigned FilePos, - bool *Invalid = nullptr) const; + bool *Invalid = nullptr, + bool BytePosition = true) const; unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid = nullptr) const; unsigned getExpansionColumnNumber(SourceLocation Loc,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits