Does the rule about using "" to make a single quote inside a quoted field also apply?
- Dennis > -----Original Message----- > From: [email protected] [mailto:[email protected]] > Sent: Sunday, April 17, 2016 09:45 > To: [email protected] > Subject: svn commit: r1739628 - in /openoffice/trunk/main: > connectivity/source/drivers/flat/ETable.cxx > tools/source/stream/stream.cxx > > Author: damjan > Date: Sun Apr 17 16:44:43 2016 > New Revision: 1739628 > > URL: http://svn.apache.org/viewvc?rev=1739628&view=rev > Log: > Make CSV line parsers consistent with CSV field parsers. > > Our CSV field parsing algorithms treats fields starting with a quote > (immediately at the beginning of the row, or after the field delimiter) > as > quoted. A quoted field ends at the corresponding closing quote, and any > remaining text between the closing quote and the next field delimeter or > end > of line is appended to the text already extracted from the field, but > not > processed further. Any quotes in this extra text are taken verbatim - > they > do not quote anything. > > Our CSV line parsers were big hacks - they essentially read and > concatenate > lines until an even number of quote characters is found, and then feed > this > through the CSV field parsers. > > This patch rewrites the line parsers to work exactly how the field > parsers > work. Text such as: > "another" ",something else > is now correctly parsed by both Calc and Base as: > [another "],[something else] > instead of breaking all further parsing. > > Patch by: me > > > Modified: > openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx > openoffice/trunk/main/tools/source/stream/stream.cxx > > Modified: > openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx > URL: > http://svn.apache.org/viewvc/openoffice/trunk/main/connectivity/source/d > rivers/flat/ETable.cxx?rev=1739628&r1=1739627&r2=1739628&view=diff > ======================================================================== > ====== > --- openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx > (original) > +++ openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx > Sun Apr 17 16:44:43 2016 > @@ -907,14 +907,64 @@ sal_Bool OFlatTable::readLine(QuotedToke > return sal_False; > > QuotedTokenizedString sLine = line; // check if the string > continues on next line > - while( (sLine.GetString().GetTokenCount(m_cStringDelimiter) % 2) != > 1 ) > + xub_StrLen nLastOffset = 0; > + bool isQuoted = false; > + bool isFieldStarting = true; > + while (true) > { > - m_pFileStream->ReadByteStringLine(sLine,nEncoding); > - if ( !m_pFileStream->IsEof() ) > + bool wasQuote = false; > + const sal_Unicode *p; > + p = sLine.GetString().GetBuffer(); > + p += nLastOffset; > + > + while (*p) > + { > + if (isQuoted) > + { > + if (*p == m_cStringDelimiter) > + wasQuote = !wasQuote; > + else > + { > + if (wasQuote) > + { > + wasQuote = false; > + isQuoted = false; > + if (*p == m_cFieldDelimiter) > + isFieldStarting = true; > + } > + } > + } > + else > + { > + if (isFieldStarting) > + { > + isFieldStarting = false; > + if (*p == m_cStringDelimiter) > + isQuoted = true; > + else if (*p == m_cFieldDelimiter) > + isFieldStarting = true; > + } > + else if (*p == m_cFieldDelimiter) > + isFieldStarting = true; > + } > + ++p; > + } > + > + if (wasQuote) > + isQuoted = false; > + > + if (isQuoted) > { > - line.GetString().Append('\n'); > - line.GetString() += sLine.GetString(); > - sLine = line; > + nLastOffset = sLine.Len(); > + m_pFileStream->ReadByteStringLine(sLine,nEncoding); > + if ( !m_pFileStream->IsEof() ) > + { > + line.GetString().Append('\n'); > + line.GetString() += sLine.GetString(); > + sLine = line; > + } > + else > + break; > } > else > break; > > Modified: openoffice/trunk/main/tools/source/stream/stream.cxx > URL: > http://svn.apache.org/viewvc/openoffice/trunk/main/tools/source/stream/s > tream.cxx?rev=1739628&r1=1739627&r2=1739628&view=diff > ======================================================================== > ====== > --- openoffice/trunk/main/tools/source/stream/stream.cxx (original) > +++ openoffice/trunk/main/tools/source/stream/stream.cxx Sun Apr 17 > 16:44:43 2016 > @@ -1128,38 +1128,59 @@ sal_Bool SvStream::ReadCsvLine( String& > { > const sal_Unicode* pSeps = rFieldSeparators.GetBuffer(); > xub_StrLen nLastOffset = 0; > - xub_StrLen nQuotes = 0; > + bool isQuoted = false; > + bool isFieldStarting = true; > while (!IsEof() && rStr.Len() < STRING_MAXLEN) > { > + bool wasQuote = false; > bool bBackslashEscaped = false; > - const sal_Unicode *p, *pStart; > - p = pStart = rStr.GetBuffer(); > + const sal_Unicode *p; > + p = rStr.GetBuffer(); > p += nLastOffset; > while (*p) > { > - if (nQuotes) > + if (isQuoted) > { > if (*p == cFieldQuote && !bBackslashEscaped) > - ++nQuotes; > - else if (bAllowBackslashEscape) > + wasQuote = !wasQuote; > + else > { > - if (*p == '\\') > - bBackslashEscaped = !bBackslashEscaped; > - else > - bBackslashEscaped = false; > + if (bAllowBackslashEscape) > + { > + if (*p == '\\') > + bBackslashEscaped = !bBackslashEscaped; > + else > + bBackslashEscaped = false; > + } > + if (wasQuote) > + { > + wasQuote = false; > + isQuoted = false; > + if (lcl_UnicodeStrChr( pSeps, *p )) > + isFieldStarting = true; > + } > } > } > - else if (*p == cFieldQuote && (p == pStart || > - lcl_UnicodeStrChr( pSeps, p[-1]))) > - nQuotes = 1; > - // A quote character inside a field content does not > start > - // a quote. > + else > + { > + if (isFieldStarting) > + { > + isFieldStarting = false; > + if (*p == cFieldQuote) > + isQuoted = true; > + else if (lcl_UnicodeStrChr( pSeps, *p )) > + isFieldStarting = true; > + } > + else if (lcl_UnicodeStrChr( pSeps, *p )) > + isFieldStarting = true; > + } > ++p; > } > > - if (nQuotes % 2 == 0) > - break; > - else > + if (wasQuote) > + isQuoted = false; > + > + if (isQuoted) > { > nLastOffset = rStr.Len(); > String aNext; > @@ -1167,6 +1188,8 @@ sal_Bool SvStream::ReadCsvLine( String& > rStr += sal_Unicode(_LF); > rStr += aNext; > } > + else > + break; > } > } > return nError == SVSTREAM_OK; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
