JsonCpp project page JsonCpp home page

src/lib_json/json_reader.cpp

Go to the documentation of this file.
00001 // Copyright 2007-2011 Baptiste Lepilleur
00002 // Distributed under MIT license, or public domain if desired and
00003 // recognized in your jurisdiction.
00004 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
00005 
00006 #if !defined(JSON_IS_AMALGAMATION)
00007 #include <json/assertions.h>
00008 #include <json/reader.h>
00009 #include <json/value.h>
00010 #include "json_tool.h"
00011 #endif // if !defined(JSON_IS_AMALGAMATION)
00012 #include <utility>
00013 #include <cstdio>
00014 #include <cassert>
00015 #include <cstring>
00016 #include <istream>
00017 #include <sstream>
00018 #include <memory>
00019 #include <set>
00020 
00021 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
00022 #define snprintf _snprintf
00023 #endif
00024 
00025 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
00026 // Disable warning about strdup being deprecated.
00027 #pragma warning(disable : 4996)
00028 #endif
00029 
00030 static int const stackLimit_g = 1000;
00031 static int       stackDepth_g = 0;  // see readValue()
00032 
00033 namespace Json {
00034 
00035 #if __cplusplus >= 201103L
00036 typedef std::unique_ptr<CharReader> CharReaderPtr;
00037 #else
00038 typedef std::auto_ptr<CharReader>   CharReaderPtr;
00039 #endif
00040 
00041 // Implementation of class Features
00042 // ////////////////////////////////
00043 
00044 Features::Features()
00045     : allowComments_(true), strictRoot_(false),
00046       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
00047 
00048 Features Features::all() { return Features(); }
00049 
00050 Features Features::strictMode() {
00051   Features features;
00052   features.allowComments_ = false;
00053   features.strictRoot_ = true;
00054   features.allowDroppedNullPlaceholders_ = false;
00055   features.allowNumericKeys_ = false;
00056   return features;
00057 }
00058 
00059 // Implementation of class Reader
00060 // ////////////////////////////////
00061 
00062 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
00063   for (; begin < end; ++begin)
00064     if (*begin == '\n' || *begin == '\r')
00065       return true;
00066   return false;
00067 }
00068 
00069 // Class Reader
00070 // //////////////////////////////////////////////////////////////////
00071 
00072 Reader::Reader()
00073     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00074       lastValue_(), commentsBefore_(), features_(Features::all()),
00075       collectComments_() {}
00076 
00077 Reader::Reader(const Features& features)
00078     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00079       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
00080 }
00081 
00082 bool
00083 Reader::parse(const std::string& document, Value& root, bool collectComments) {
00084   document_ = document;
00085   const char* begin = document_.c_str();
00086   const char* end = begin + document_.length();
00087   return parse(begin, end, root, collectComments);
00088 }
00089 
00090 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
00091   // std::istream_iterator<char> begin(sin);
00092   // std::istream_iterator<char> end;
00093   // Those would allow streamed input from a file, if parse() were a
00094   // template function.
00095 
00096   // Since std::string is reference-counted, this at least does not
00097   // create an extra copy.
00098   std::string doc;
00099   std::getline(sin, doc, (char)EOF);
00100   return parse(doc, root, collectComments);
00101 }
00102 
00103 bool Reader::parse(const char* beginDoc,
00104                    const char* endDoc,
00105                    Value& root,
00106                    bool collectComments) {
00107   if (!features_.allowComments_) {
00108     collectComments = false;
00109   }
00110 
00111   begin_ = beginDoc;
00112   end_ = endDoc;
00113   collectComments_ = collectComments;
00114   current_ = begin_;
00115   lastValueEnd_ = 0;
00116   lastValue_ = 0;
00117   commentsBefore_ = "";
00118   errors_.clear();
00119   while (!nodes_.empty())
00120     nodes_.pop();
00121   nodes_.push(&root);
00122 
00123   stackDepth_g = 0;  // Yes, this is bad coding, but options are limited.
00124   bool successful = readValue();
00125   Token token;
00126   skipCommentTokens(token);
00127   if (collectComments_ && !commentsBefore_.empty())
00128     root.setComment(commentsBefore_, commentAfter);
00129   if (features_.strictRoot_) {
00130     if (!root.isArray() && !root.isObject()) {
00131       // Set error location to start of doc, ideally should be first token found
00132       // in doc
00133       token.type_ = tokenError;
00134       token.start_ = beginDoc;
00135       token.end_ = endDoc;
00136       addError(
00137           "A valid JSON document must be either an array or an object value.",
00138           token);
00139       return false;
00140     }
00141   }
00142   return successful;
00143 }
00144 
00145 bool Reader::readValue() {
00146   // This is a non-reentrant way to support a stackLimit. Terrible!
00147   // But this deprecated class has a security problem: Bad input can
00148   // cause a seg-fault. This seems like a fair, binary-compatible way
00149   // to prevent the problem.
00150   if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
00151   ++stackDepth_g;
00152 
00153   Token token;
00154   skipCommentTokens(token);
00155   bool successful = true;
00156 
00157   if (collectComments_ && !commentsBefore_.empty()) {
00158     currentValue().setComment(commentsBefore_, commentBefore);
00159     commentsBefore_ = "";
00160   }
00161 
00162   switch (token.type_) {
00163   case tokenObjectBegin:
00164     successful = readObject(token);
00165     currentValue().setOffsetLimit(current_ - begin_);
00166     break;
00167   case tokenArrayBegin:
00168     successful = readArray(token);
00169     currentValue().setOffsetLimit(current_ - begin_);
00170     break;
00171   case tokenNumber:
00172     successful = decodeNumber(token);
00173     break;
00174   case tokenString:
00175     successful = decodeString(token);
00176     break;
00177   case tokenTrue:
00178     {
00179     Value v(true);
00180     currentValue().swapPayload(v);
00181     currentValue().setOffsetStart(token.start_ - begin_);
00182     currentValue().setOffsetLimit(token.end_ - begin_);
00183     }
00184     break;
00185   case tokenFalse:
00186     {
00187     Value v(false);
00188     currentValue().swapPayload(v);
00189     currentValue().setOffsetStart(token.start_ - begin_);
00190     currentValue().setOffsetLimit(token.end_ - begin_);
00191     }
00192     break;
00193   case tokenNull:
00194     {
00195     Value v;
00196     currentValue().swapPayload(v);
00197     currentValue().setOffsetStart(token.start_ - begin_);
00198     currentValue().setOffsetLimit(token.end_ - begin_);
00199     }
00200     break;
00201   case tokenArraySeparator:
00202   case tokenObjectEnd:
00203   case tokenArrayEnd:
00204     if (features_.allowDroppedNullPlaceholders_) {
00205       // "Un-read" the current token and mark the current value as a null
00206       // token.
00207       current_--;
00208       Value v;
00209       currentValue().swapPayload(v);
00210       currentValue().setOffsetStart(current_ - begin_ - 1);
00211       currentValue().setOffsetLimit(current_ - begin_);
00212       break;
00213     } // Else, fall through...
00214   default:
00215     currentValue().setOffsetStart(token.start_ - begin_);
00216     currentValue().setOffsetLimit(token.end_ - begin_);
00217     return addError("Syntax error: value, object or array expected.", token);
00218   }
00219 
00220   if (collectComments_) {
00221     lastValueEnd_ = current_;
00222     lastValue_ = &currentValue();
00223   }
00224 
00225   --stackDepth_g;
00226   return successful;
00227 }
00228 
00229 void Reader::skipCommentTokens(Token& token) {
00230   if (features_.allowComments_) {
00231     do {
00232       readToken(token);
00233     } while (token.type_ == tokenComment);
00234   } else {
00235     readToken(token);
00236   }
00237 }
00238 
00239 bool Reader::readToken(Token& token) {
00240   skipSpaces();
00241   token.start_ = current_;
00242   Char c = getNextChar();
00243   bool ok = true;
00244   switch (c) {
00245   case '{':
00246     token.type_ = tokenObjectBegin;
00247     break;
00248   case '}':
00249     token.type_ = tokenObjectEnd;
00250     break;
00251   case '[':
00252     token.type_ = tokenArrayBegin;
00253     break;
00254   case ']':
00255     token.type_ = tokenArrayEnd;
00256     break;
00257   case '"':
00258     token.type_ = tokenString;
00259     ok = readString();
00260     break;
00261   case '/':
00262     token.type_ = tokenComment;
00263     ok = readComment();
00264     break;
00265   case '0':
00266   case '1':
00267   case '2':
00268   case '3':
00269   case '4':
00270   case '5':
00271   case '6':
00272   case '7':
00273   case '8':
00274   case '9':
00275   case '-':
00276     token.type_ = tokenNumber;
00277     readNumber();
00278     break;
00279   case 't':
00280     token.type_ = tokenTrue;
00281     ok = match("rue", 3);
00282     break;
00283   case 'f':
00284     token.type_ = tokenFalse;
00285     ok = match("alse", 4);
00286     break;
00287   case 'n':
00288     token.type_ = tokenNull;
00289     ok = match("ull", 3);
00290     break;
00291   case ',':
00292     token.type_ = tokenArraySeparator;
00293     break;
00294   case ':':
00295     token.type_ = tokenMemberSeparator;
00296     break;
00297   case 0:
00298     token.type_ = tokenEndOfStream;
00299     break;
00300   default:
00301     ok = false;
00302     break;
00303   }
00304   if (!ok)
00305     token.type_ = tokenError;
00306   token.end_ = current_;
00307   return true;
00308 }
00309 
00310 void Reader::skipSpaces() {
00311   while (current_ != end_) {
00312     Char c = *current_;
00313     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
00314       ++current_;
00315     else
00316       break;
00317   }
00318 }
00319 
00320 bool Reader::match(Location pattern, int patternLength) {
00321   if (end_ - current_ < patternLength)
00322     return false;
00323   int index = patternLength;
00324   while (index--)
00325     if (current_[index] != pattern[index])
00326       return false;
00327   current_ += patternLength;
00328   return true;
00329 }
00330 
00331 bool Reader::readComment() {
00332   Location commentBegin = current_ - 1;
00333   Char c = getNextChar();
00334   bool successful = false;
00335   if (c == '*')
00336     successful = readCStyleComment();
00337   else if (c == '/')
00338     successful = readCppStyleComment();
00339   if (!successful)
00340     return false;
00341 
00342   if (collectComments_) {
00343     CommentPlacement placement = commentBefore;
00344     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
00345       if (c != '*' || !containsNewLine(commentBegin, current_))
00346         placement = commentAfterOnSameLine;
00347     }
00348 
00349     addComment(commentBegin, current_, placement);
00350   }
00351   return true;
00352 }
00353 
00354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
00355   std::string normalized;
00356   normalized.reserve(end - begin);
00357   Reader::Location current = begin;
00358   while (current != end) {
00359     char c = *current++;
00360     if (c == '\r') {
00361       if (current != end && *current == '\n')
00362          // convert dos EOL
00363          ++current;
00364       // convert Mac EOL
00365       normalized += '\n';
00366     } else {
00367       normalized += c;
00368     }
00369   }
00370   return normalized;
00371 }
00372 
00373 void
00374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
00375   assert(collectComments_);
00376   const std::string& normalized = normalizeEOL(begin, end);
00377   if (placement == commentAfterOnSameLine) {
00378     assert(lastValue_ != 0);
00379     lastValue_->setComment(normalized, placement);
00380   } else {
00381     commentsBefore_ += normalized;
00382   }
00383 }
00384 
00385 bool Reader::readCStyleComment() {
00386   while (current_ != end_) {
00387     Char c = getNextChar();
00388     if (c == '*' && *current_ == '/')
00389       break;
00390   }
00391   return getNextChar() == '/';
00392 }
00393 
00394 bool Reader::readCppStyleComment() {
00395   while (current_ != end_) {
00396     Char c = getNextChar();
00397     if (c == '\n')
00398       break;
00399     if (c == '\r') {
00400       // Consume DOS EOL. It will be normalized in addComment.
00401       if (current_ != end_ && *current_ == '\n')
00402         getNextChar();
00403       // Break on Moc OS 9 EOL.
00404       break;
00405     }
00406   }
00407   return true;
00408 }
00409 
00410 void Reader::readNumber() {
00411   const char *p = current_;
00412   char c = '0'; // stopgap for already consumed character
00413   // integral part
00414   while (c >= '0' && c <= '9')
00415     c = (current_ = p) < end_ ? *p++ : 0;
00416   // fractional part
00417   if (c == '.') {
00418     c = (current_ = p) < end_ ? *p++ : 0;
00419     while (c >= '0' && c <= '9')
00420       c = (current_ = p) < end_ ? *p++ : 0;
00421   }
00422   // exponential part
00423   if (c == 'e' || c == 'E') {
00424     c = (current_ = p) < end_ ? *p++ : 0;
00425     if (c == '+' || c == '-')
00426       c = (current_ = p) < end_ ? *p++ : 0;
00427     while (c >= '0' && c <= '9')
00428       c = (current_ = p) < end_ ? *p++ : 0;
00429   }
00430 }
00431 
00432 bool Reader::readString() {
00433   Char c = 0;
00434   while (current_ != end_) {
00435     c = getNextChar();
00436     if (c == '\\')
00437       getNextChar();
00438     else if (c == '"')
00439       break;
00440   }
00441   return c == '"';
00442 }
00443 
00444 bool Reader::readObject(Token& tokenStart) {
00445   Token tokenName;
00446   std::string name;
00447   Value init(objectValue);
00448   currentValue().swapPayload(init);
00449   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00450   while (readToken(tokenName)) {
00451     bool initialTokenOk = true;
00452     while (tokenName.type_ == tokenComment && initialTokenOk)
00453       initialTokenOk = readToken(tokenName);
00454     if (!initialTokenOk)
00455       break;
00456     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
00457       return true;
00458     name = "";
00459     if (tokenName.type_ == tokenString) {
00460       if (!decodeString(tokenName, name))
00461         return recoverFromError(tokenObjectEnd);
00462     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
00463       Value numberName;
00464       if (!decodeNumber(tokenName, numberName))
00465         return recoverFromError(tokenObjectEnd);
00466       name = numberName.asString();
00467     } else {
00468       break;
00469     }
00470 
00471     Token colon;
00472     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
00473       return addErrorAndRecover(
00474           "Missing ':' after object member name", colon, tokenObjectEnd);
00475     }
00476     Value& value = currentValue()[name];
00477     nodes_.push(&value);
00478     bool ok = readValue();
00479     nodes_.pop();
00480     if (!ok) // error already set
00481       return recoverFromError(tokenObjectEnd);
00482 
00483     Token comma;
00484     if (!readToken(comma) ||
00485         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
00486          comma.type_ != tokenComment)) {
00487       return addErrorAndRecover(
00488           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
00489     }
00490     bool finalizeTokenOk = true;
00491     while (comma.type_ == tokenComment && finalizeTokenOk)
00492       finalizeTokenOk = readToken(comma);
00493     if (comma.type_ == tokenObjectEnd)
00494       return true;
00495   }
00496   return addErrorAndRecover(
00497       "Missing '}' or object member name", tokenName, tokenObjectEnd);
00498 }
00499 
00500 bool Reader::readArray(Token& tokenStart) {
00501   Value init(arrayValue);
00502   currentValue().swapPayload(init);
00503   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00504   skipSpaces();
00505   if (*current_ == ']') // empty array
00506   {
00507     Token endArray;
00508     readToken(endArray);
00509     return true;
00510   }
00511   int index = 0;
00512   for (;;) {
00513     Value& value = currentValue()[index++];
00514     nodes_.push(&value);
00515     bool ok = readValue();
00516     nodes_.pop();
00517     if (!ok) // error already set
00518       return recoverFromError(tokenArrayEnd);
00519 
00520     Token token;
00521     // Accept Comment after last item in the array.
00522     ok = readToken(token);
00523     while (token.type_ == tokenComment && ok) {
00524       ok = readToken(token);
00525     }
00526     bool badTokenType =
00527         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
00528     if (!ok || badTokenType) {
00529       return addErrorAndRecover(
00530           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
00531     }
00532     if (token.type_ == tokenArrayEnd)
00533       break;
00534   }
00535   return true;
00536 }
00537 
00538 bool Reader::decodeNumber(Token& token) {
00539   Value decoded;
00540   if (!decodeNumber(token, decoded))
00541     return false;
00542   currentValue().swapPayload(decoded);
00543   currentValue().setOffsetStart(token.start_ - begin_);
00544   currentValue().setOffsetLimit(token.end_ - begin_);
00545   return true;
00546 }
00547 
00548 bool Reader::decodeNumber(Token& token, Value& decoded) {
00549   // Attempts to parse the number as an integer. If the number is
00550   // larger than the maximum supported value of an integer then
00551   // we decode the number as a double.
00552   Location current = token.start_;
00553   bool isNegative = *current == '-';
00554   if (isNegative)
00555     ++current;
00556   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
00557   Value::LargestUInt maxIntegerValue =
00558       isNegative ? Value::LargestUInt(-Value::minLargestInt)
00559                  : Value::maxLargestUInt;
00560   Value::LargestUInt threshold = maxIntegerValue / 10;
00561   Value::LargestUInt value = 0;
00562   while (current < token.end_) {
00563     Char c = *current++;
00564     if (c < '0' || c > '9')
00565       return decodeDouble(token, decoded);
00566     Value::UInt digit(c - '0');
00567     if (value >= threshold) {
00568       // We've hit or exceeded the max value divided by 10 (rounded down). If
00569       // a) we've only just touched the limit, b) this is the last digit, and
00570       // c) it's small enough to fit in that rounding delta, we're okay.
00571       // Otherwise treat this number as a double to avoid overflow.
00572       if (value > threshold || current != token.end_ ||
00573           digit > maxIntegerValue % 10) {
00574         return decodeDouble(token, decoded);
00575       }
00576     }
00577     value = value * 10 + digit;
00578   }
00579   if (isNegative)
00580     decoded = -Value::LargestInt(value);
00581   else if (value <= Value::LargestUInt(Value::maxInt))
00582     decoded = Value::LargestInt(value);
00583   else
00584     decoded = value;
00585   return true;
00586 }
00587 
00588 bool Reader::decodeDouble(Token& token) {
00589   Value decoded;
00590   if (!decodeDouble(token, decoded))
00591     return false;
00592   currentValue().swapPayload(decoded);
00593   currentValue().setOffsetStart(token.start_ - begin_);
00594   currentValue().setOffsetLimit(token.end_ - begin_);
00595   return true;
00596 }
00597 
00598 bool Reader::decodeDouble(Token& token, Value& decoded) {
00599   double value = 0;
00600   const int bufferSize = 32;
00601   int count;
00602   int length = int(token.end_ - token.start_);
00603 
00604   // Sanity check to avoid buffer overflow exploits.
00605   if (length < 0) {
00606     return addError("Unable to parse token length", token);
00607   }
00608 
00609   // Avoid using a string constant for the format control string given to
00610   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
00611   // info:
00612   //
00613   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
00614   char format[] = "%lf";
00615 
00616   if (length <= bufferSize) {
00617     Char buffer[bufferSize + 1];
00618     memcpy(buffer, token.start_, length);
00619     buffer[length] = 0;
00620     count = sscanf(buffer, format, &value);
00621   } else {
00622     std::string buffer(token.start_, token.end_);
00623     count = sscanf(buffer.c_str(), format, &value);
00624   }
00625 
00626   if (count != 1)
00627     return addError("'" + std::string(token.start_, token.end_) +
00628                         "' is not a number.",
00629                     token);
00630   decoded = value;
00631   return true;
00632 }
00633 
00634 bool Reader::decodeString(Token& token) {
00635   std::string decoded_string;
00636   if (!decodeString(token, decoded_string))
00637     return false;
00638   Value decoded(decoded_string);
00639   currentValue().swapPayload(decoded);
00640   currentValue().setOffsetStart(token.start_ - begin_);
00641   currentValue().setOffsetLimit(token.end_ - begin_);
00642   return true;
00643 }
00644 
00645 bool Reader::decodeString(Token& token, std::string& decoded) {
00646   decoded.reserve(token.end_ - token.start_ - 2);
00647   Location current = token.start_ + 1; // skip '"'
00648   Location end = token.end_ - 1;       // do not include '"'
00649   while (current != end) {
00650     Char c = *current++;
00651     if (c == '"')
00652       break;
00653     else if (c == '\\') {
00654       if (current == end)
00655         return addError("Empty escape sequence in string", token, current);
00656       Char escape = *current++;
00657       switch (escape) {
00658       case '"':
00659         decoded += '"';
00660         break;
00661       case '/':
00662         decoded += '/';
00663         break;
00664       case '\\':
00665         decoded += '\\';
00666         break;
00667       case 'b':
00668         decoded += '\b';
00669         break;
00670       case 'f':
00671         decoded += '\f';
00672         break;
00673       case 'n':
00674         decoded += '\n';
00675         break;
00676       case 'r':
00677         decoded += '\r';
00678         break;
00679       case 't':
00680         decoded += '\t';
00681         break;
00682       case 'u': {
00683         unsigned int unicode;
00684         if (!decodeUnicodeCodePoint(token, current, end, unicode))
00685           return false;
00686         decoded += codePointToUTF8(unicode);
00687       } break;
00688       default:
00689         return addError("Bad escape sequence in string", token, current);
00690       }
00691     } else {
00692       decoded += c;
00693     }
00694   }
00695   return true;
00696 }
00697 
00698 bool Reader::decodeUnicodeCodePoint(Token& token,
00699                                     Location& current,
00700                                     Location end,
00701                                     unsigned int& unicode) {
00702 
00703   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
00704     return false;
00705   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
00706     // surrogate pairs
00707     if (end - current < 6)
00708       return addError(
00709           "additional six characters expected to parse unicode surrogate pair.",
00710           token,
00711           current);
00712     unsigned int surrogatePair;
00713     if (*(current++) == '\\' && *(current++) == 'u') {
00714       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
00715         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00716       } else
00717         return false;
00718     } else
00719       return addError("expecting another \\u token to begin the second half of "
00720                       "a unicode surrogate pair",
00721                       token,
00722                       current);
00723   }
00724   return true;
00725 }
00726 
00727 bool Reader::decodeUnicodeEscapeSequence(Token& token,
00728                                          Location& current,
00729                                          Location end,
00730                                          unsigned int& unicode) {
00731   if (end - current < 4)
00732     return addError(
00733         "Bad unicode escape sequence in string: four digits expected.",
00734         token,
00735         current);
00736   unicode = 0;
00737   for (int index = 0; index < 4; ++index) {
00738     Char c = *current++;
00739     unicode *= 16;
00740     if (c >= '0' && c <= '9')
00741       unicode += c - '0';
00742     else if (c >= 'a' && c <= 'f')
00743       unicode += c - 'a' + 10;
00744     else if (c >= 'A' && c <= 'F')
00745       unicode += c - 'A' + 10;
00746     else
00747       return addError(
00748           "Bad unicode escape sequence in string: hexadecimal digit expected.",
00749           token,
00750           current);
00751   }
00752   return true;
00753 }
00754 
00755 bool
00756 Reader::addError(const std::string& message, Token& token, Location extra) {
00757   ErrorInfo info;
00758   info.token_ = token;
00759   info.message_ = message;
00760   info.extra_ = extra;
00761   errors_.push_back(info);
00762   return false;
00763 }
00764 
00765 bool Reader::recoverFromError(TokenType skipUntilToken) {
00766   int errorCount = int(errors_.size());
00767   Token skip;
00768   for (;;) {
00769     if (!readToken(skip))
00770       errors_.resize(errorCount); // discard errors caused by recovery
00771     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
00772       break;
00773   }
00774   errors_.resize(errorCount);
00775   return false;
00776 }
00777 
00778 bool Reader::addErrorAndRecover(const std::string& message,
00779                                 Token& token,
00780                                 TokenType skipUntilToken) {
00781   addError(message, token);
00782   return recoverFromError(skipUntilToken);
00783 }
00784 
00785 Value& Reader::currentValue() { return *(nodes_.top()); }
00786 
00787 Reader::Char Reader::getNextChar() {
00788   if (current_ == end_)
00789     return 0;
00790   return *current_++;
00791 }
00792 
00793 void Reader::getLocationLineAndColumn(Location location,
00794                                       int& line,
00795                                       int& column) const {
00796   Location current = begin_;
00797   Location lastLineStart = current;
00798   line = 0;
00799   while (current < location && current != end_) {
00800     Char c = *current++;
00801     if (c == '\r') {
00802       if (*current == '\n')
00803         ++current;
00804       lastLineStart = current;
00805       ++line;
00806     } else if (c == '\n') {
00807       lastLineStart = current;
00808       ++line;
00809     }
00810   }
00811   // column & line start at 1
00812   column = int(location - lastLineStart) + 1;
00813   ++line;
00814 }
00815 
00816 std::string Reader::getLocationLineAndColumn(Location location) const {
00817   int line, column;
00818   getLocationLineAndColumn(location, line, column);
00819   char buffer[18 + 16 + 16 + 1];
00820 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
00821 #if defined(WINCE)
00822   _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00823 #else
00824   sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00825 #endif
00826 #else
00827   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00828 #endif
00829   return buffer;
00830 }
00831 
00832 // Deprecated. Preserved for backward compatibility
00833 std::string Reader::getFormatedErrorMessages() const {
00834   return getFormattedErrorMessages();
00835 }
00836 
00837 std::string Reader::getFormattedErrorMessages() const {
00838   std::string formattedMessage;
00839   for (Errors::const_iterator itError = errors_.begin();
00840        itError != errors_.end();
00841        ++itError) {
00842     const ErrorInfo& error = *itError;
00843     formattedMessage +=
00844         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
00845     formattedMessage += "  " + error.message_ + "\n";
00846     if (error.extra_)
00847       formattedMessage +=
00848           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
00849   }
00850   return formattedMessage;
00851 }
00852 
00853 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
00854   std::vector<Reader::StructuredError> allErrors;
00855   for (Errors::const_iterator itError = errors_.begin();
00856        itError != errors_.end();
00857        ++itError) {
00858     const ErrorInfo& error = *itError;
00859     Reader::StructuredError structured;
00860     structured.offset_start = error.token_.start_ - begin_;
00861     structured.offset_limit = error.token_.end_ - begin_;
00862     structured.message = error.message_;
00863     allErrors.push_back(structured);
00864   }
00865   return allErrors;
00866 }
00867 
00868 bool Reader::pushError(const Value& value, const std::string& message) {
00869   size_t length = end_ - begin_;
00870   if(value.getOffsetStart() > length
00871     || value.getOffsetLimit() > length)
00872     return false;
00873   Token token;
00874   token.type_ = tokenError;
00875   token.start_ = begin_ + value.getOffsetStart();
00876   token.end_ = end_ + value.getOffsetLimit();
00877   ErrorInfo info;
00878   info.token_ = token;
00879   info.message_ = message;
00880   info.extra_ = 0;
00881   errors_.push_back(info);
00882   return true;
00883 }
00884 
00885 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
00886   size_t length = end_ - begin_;
00887   if(value.getOffsetStart() > length
00888     || value.getOffsetLimit() > length
00889     || extra.getOffsetLimit() > length)
00890     return false;
00891   Token token;
00892   token.type_ = tokenError;
00893   token.start_ = begin_ + value.getOffsetStart();
00894   token.end_ = begin_ + value.getOffsetLimit();
00895   ErrorInfo info;
00896   info.token_ = token;
00897   info.message_ = message;
00898   info.extra_ = begin_ + extra.getOffsetStart();
00899   errors_.push_back(info);
00900   return true;
00901 }
00902 
00903 bool Reader::good() const {
00904   return !errors_.size();
00905 }
00906 
00907 // exact copy of Features
00908 class OurFeatures {
00909 public:
00910   static OurFeatures all();
00911   OurFeatures();
00912   bool allowComments_;
00913   bool strictRoot_;
00914   bool allowDroppedNullPlaceholders_;
00915   bool allowNumericKeys_;
00916   bool allowSingleQuotes_;
00917   bool failIfExtra_;
00918   bool rejectDupKeys_;
00919   int stackLimit_;
00920 };  // OurFeatures
00921 
00922 // exact copy of Implementation of class Features
00923 // ////////////////////////////////
00924 
00925 OurFeatures::OurFeatures()
00926     : allowComments_(true), strictRoot_(false)
00927     , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false)
00928     , allowSingleQuotes_(false)
00929     , failIfExtra_(false)
00930 {
00931 }
00932 
00933 OurFeatures OurFeatures::all() { return OurFeatures(); }
00934 
00935 // Implementation of class Reader
00936 // ////////////////////////////////
00937 
00938 // exact copy of Reader, renamed to OurReader
00939 class OurReader {
00940 public:
00941   typedef char Char;
00942   typedef const Char* Location;
00943   struct StructuredError {
00944     size_t offset_start;
00945     size_t offset_limit;
00946     std::string message;
00947   };
00948 
00949   OurReader(OurFeatures const& features);
00950   bool parse(const char* beginDoc,
00951              const char* endDoc,
00952              Value& root,
00953              bool collectComments = true);
00954   std::string getFormattedErrorMessages() const;
00955   std::vector<StructuredError> getStructuredErrors() const;
00956   bool pushError(const Value& value, const std::string& message);
00957   bool pushError(const Value& value, const std::string& message, const Value& extra);
00958   bool good() const;
00959 
00960 private:
00961   OurReader(OurReader const&);  // no impl
00962   void operator=(OurReader const&);  // no impl
00963 
00964   enum TokenType {
00965     tokenEndOfStream = 0,
00966     tokenObjectBegin,
00967     tokenObjectEnd,
00968     tokenArrayBegin,
00969     tokenArrayEnd,
00970     tokenString,
00971     tokenNumber,
00972     tokenTrue,
00973     tokenFalse,
00974     tokenNull,
00975     tokenArraySeparator,
00976     tokenMemberSeparator,
00977     tokenComment,
00978     tokenError
00979   };
00980 
00981   class Token {
00982   public:
00983     TokenType type_;
00984     Location start_;
00985     Location end_;
00986   };
00987 
00988   class ErrorInfo {
00989   public:
00990     Token token_;
00991     std::string message_;
00992     Location extra_;
00993   };
00994 
00995   typedef std::deque<ErrorInfo> Errors;
00996 
00997   bool readToken(Token& token);
00998   void skipSpaces();
00999   bool match(Location pattern, int patternLength);
01000   bool readComment();
01001   bool readCStyleComment();
01002   bool readCppStyleComment();
01003   bool readString();
01004   bool readStringSingleQuote();
01005   void readNumber();
01006   bool readValue();
01007   bool readObject(Token& token);
01008   bool readArray(Token& token);
01009   bool decodeNumber(Token& token);
01010   bool decodeNumber(Token& token, Value& decoded);
01011   bool decodeString(Token& token);
01012   bool decodeString(Token& token, std::string& decoded);
01013   bool decodeDouble(Token& token);
01014   bool decodeDouble(Token& token, Value& decoded);
01015   bool decodeUnicodeCodePoint(Token& token,
01016                               Location& current,
01017                               Location end,
01018                               unsigned int& unicode);
01019   bool decodeUnicodeEscapeSequence(Token& token,
01020                                    Location& current,
01021                                    Location end,
01022                                    unsigned int& unicode);
01023   bool addError(const std::string& message, Token& token, Location extra = 0);
01024   bool recoverFromError(TokenType skipUntilToken);
01025   bool addErrorAndRecover(const std::string& message,
01026                           Token& token,
01027                           TokenType skipUntilToken);
01028   void skipUntilSpace();
01029   Value& currentValue();
01030   Char getNextChar();
01031   void
01032   getLocationLineAndColumn(Location location, int& line, int& column) const;
01033   std::string getLocationLineAndColumn(Location location) const;
01034   void addComment(Location begin, Location end, CommentPlacement placement);
01035   void skipCommentTokens(Token& token);
01036 
01037   typedef std::stack<Value*> Nodes;
01038   Nodes nodes_;
01039   Errors errors_;
01040   std::string document_;
01041   Location begin_;
01042   Location end_;
01043   Location current_;
01044   Location lastValueEnd_;
01045   Value* lastValue_;
01046   std::string commentsBefore_;
01047   int stackDepth_;
01048 
01049   OurFeatures const features_;
01050   bool collectComments_;
01051 };  // OurReader
01052 
01053 // complete copy of Read impl, for OurReader
01054 
01055 OurReader::OurReader(OurFeatures const& features)
01056     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
01057       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
01058 }
01059 
01060 bool OurReader::parse(const char* beginDoc,
01061                    const char* endDoc,
01062                    Value& root,
01063                    bool collectComments) {
01064   if (!features_.allowComments_) {
01065     collectComments = false;
01066   }
01067 
01068   begin_ = beginDoc;
01069   end_ = endDoc;
01070   collectComments_ = collectComments;
01071   current_ = begin_;
01072   lastValueEnd_ = 0;
01073   lastValue_ = 0;
01074   commentsBefore_ = "";
01075   errors_.clear();
01076   while (!nodes_.empty())
01077     nodes_.pop();
01078   nodes_.push(&root);
01079 
01080   stackDepth_ = 0;
01081   bool successful = readValue();
01082   Token token;
01083   skipCommentTokens(token);
01084   if (features_.failIfExtra_) {
01085     if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
01086       addError("Extra non-whitespace after JSON value.", token);
01087       return false;
01088     }
01089   }
01090   if (collectComments_ && !commentsBefore_.empty())
01091     root.setComment(commentsBefore_, commentAfter);
01092   if (features_.strictRoot_) {
01093     if (!root.isArray() && !root.isObject()) {
01094       // Set error location to start of doc, ideally should be first token found
01095       // in doc
01096       token.type_ = tokenError;
01097       token.start_ = beginDoc;
01098       token.end_ = endDoc;
01099       addError(
01100           "A valid JSON document must be either an array or an object value.",
01101           token);
01102       return false;
01103     }
01104   }
01105   return successful;
01106 }
01107 
01108 bool OurReader::readValue() {
01109   if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
01110   ++stackDepth_;
01111   Token token;
01112   skipCommentTokens(token);
01113   bool successful = true;
01114 
01115   if (collectComments_ && !commentsBefore_.empty()) {
01116     currentValue().setComment(commentsBefore_, commentBefore);
01117     commentsBefore_ = "";
01118   }
01119 
01120   switch (token.type_) {
01121   case tokenObjectBegin:
01122     successful = readObject(token);
01123     currentValue().setOffsetLimit(current_ - begin_);
01124     break;
01125   case tokenArrayBegin:
01126     successful = readArray(token);
01127     currentValue().setOffsetLimit(current_ - begin_);
01128     break;
01129   case tokenNumber:
01130     successful = decodeNumber(token);
01131     break;
01132   case tokenString:
01133     successful = decodeString(token);
01134     break;
01135   case tokenTrue:
01136     {
01137     Value v(true);
01138     currentValue().swapPayload(v);
01139     currentValue().setOffsetStart(token.start_ - begin_);
01140     currentValue().setOffsetLimit(token.end_ - begin_);
01141     }
01142     break;
01143   case tokenFalse:
01144     {
01145     Value v(false);
01146     currentValue().swapPayload(v);
01147     currentValue().setOffsetStart(token.start_ - begin_);
01148     currentValue().setOffsetLimit(token.end_ - begin_);
01149     }
01150     break;
01151   case tokenNull:
01152     {
01153     Value v;
01154     currentValue().swapPayload(v);
01155     currentValue().setOffsetStart(token.start_ - begin_);
01156     currentValue().setOffsetLimit(token.end_ - begin_);
01157     }
01158     break;
01159   case tokenArraySeparator:
01160   case tokenObjectEnd:
01161   case tokenArrayEnd:
01162     if (features_.allowDroppedNullPlaceholders_) {
01163       // "Un-read" the current token and mark the current value as a null
01164       // token.
01165       current_--;
01166       Value v;
01167       currentValue().swapPayload(v);
01168       currentValue().setOffsetStart(current_ - begin_ - 1);
01169       currentValue().setOffsetLimit(current_ - begin_);
01170       break;
01171     } // else, fall through ...
01172   default:
01173     currentValue().setOffsetStart(token.start_ - begin_);
01174     currentValue().setOffsetLimit(token.end_ - begin_);
01175     return addError("Syntax error: value, object or array expected.", token);
01176   }
01177 
01178   if (collectComments_) {
01179     lastValueEnd_ = current_;
01180     lastValue_ = &currentValue();
01181   }
01182 
01183   --stackDepth_;
01184   return successful;
01185 }
01186 
01187 void OurReader::skipCommentTokens(Token& token) {
01188   if (features_.allowComments_) {
01189     do {
01190       readToken(token);
01191     } while (token.type_ == tokenComment);
01192   } else {
01193     readToken(token);
01194   }
01195 }
01196 
01197 bool OurReader::readToken(Token& token) {
01198   skipSpaces();
01199   token.start_ = current_;
01200   Char c = getNextChar();
01201   bool ok = true;
01202   switch (c) {
01203   case '{':
01204     token.type_ = tokenObjectBegin;
01205     break;
01206   case '}':
01207     token.type_ = tokenObjectEnd;
01208     break;
01209   case '[':
01210     token.type_ = tokenArrayBegin;
01211     break;
01212   case ']':
01213     token.type_ = tokenArrayEnd;
01214     break;
01215   case '"':
01216     token.type_ = tokenString;
01217     ok = readString();
01218     break;
01219   case '\'':
01220     if (features_.allowSingleQuotes_) {
01221     token.type_ = tokenString;
01222     ok = readStringSingleQuote();
01223     break;
01224     } // else continue
01225   case '/':
01226     token.type_ = tokenComment;
01227     ok = readComment();
01228     break;
01229   case '0':
01230   case '1':
01231   case '2':
01232   case '3':
01233   case '4':
01234   case '5':
01235   case '6':
01236   case '7':
01237   case '8':
01238   case '9':
01239   case '-':
01240     token.type_ = tokenNumber;
01241     readNumber();
01242     break;
01243   case 't':
01244     token.type_ = tokenTrue;
01245     ok = match("rue", 3);
01246     break;
01247   case 'f':
01248     token.type_ = tokenFalse;
01249     ok = match("alse", 4);
01250     break;
01251   case 'n':
01252     token.type_ = tokenNull;
01253     ok = match("ull", 3);
01254     break;
01255   case ',':
01256     token.type_ = tokenArraySeparator;
01257     break;
01258   case ':':
01259     token.type_ = tokenMemberSeparator;
01260     break;
01261   case 0:
01262     token.type_ = tokenEndOfStream;
01263     break;
01264   default:
01265     ok = false;
01266     break;
01267   }
01268   if (!ok)
01269     token.type_ = tokenError;
01270   token.end_ = current_;
01271   return true;
01272 }
01273 
01274 void OurReader::skipSpaces() {
01275   while (current_ != end_) {
01276     Char c = *current_;
01277     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
01278       ++current_;
01279     else
01280       break;
01281   }
01282 }
01283 
01284 bool OurReader::match(Location pattern, int patternLength) {
01285   if (end_ - current_ < patternLength)
01286     return false;
01287   int index = patternLength;
01288   while (index--)
01289     if (current_[index] != pattern[index])
01290       return false;
01291   current_ += patternLength;
01292   return true;
01293 }
01294 
01295 bool OurReader::readComment() {
01296   Location commentBegin = current_ - 1;
01297   Char c = getNextChar();
01298   bool successful = false;
01299   if (c == '*')
01300     successful = readCStyleComment();
01301   else if (c == '/')
01302     successful = readCppStyleComment();
01303   if (!successful)
01304     return false;
01305 
01306   if (collectComments_) {
01307     CommentPlacement placement = commentBefore;
01308     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
01309       if (c != '*' || !containsNewLine(commentBegin, current_))
01310         placement = commentAfterOnSameLine;
01311     }
01312 
01313     addComment(commentBegin, current_, placement);
01314   }
01315   return true;
01316 }
01317 
01318 void
01319 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
01320   assert(collectComments_);
01321   const std::string& normalized = normalizeEOL(begin, end);
01322   if (placement == commentAfterOnSameLine) {
01323     assert(lastValue_ != 0);
01324     lastValue_->setComment(normalized, placement);
01325   } else {
01326     commentsBefore_ += normalized;
01327   }
01328 }
01329 
01330 bool OurReader::readCStyleComment() {
01331   while (current_ != end_) {
01332     Char c = getNextChar();
01333     if (c == '*' && *current_ == '/')
01334       break;
01335   }
01336   return getNextChar() == '/';
01337 }
01338 
01339 bool OurReader::readCppStyleComment() {
01340   while (current_ != end_) {
01341     Char c = getNextChar();
01342     if (c == '\n')
01343       break;
01344     if (c == '\r') {
01345       // Consume DOS EOL. It will be normalized in addComment.
01346       if (current_ != end_ && *current_ == '\n')
01347         getNextChar();
01348       // Break on Moc OS 9 EOL.
01349       break;
01350     }
01351   }
01352   return true;
01353 }
01354 
01355 void OurReader::readNumber() {
01356   const char *p = current_;
01357   char c = '0'; // stopgap for already consumed character
01358   // integral part
01359   while (c >= '0' && c <= '9')
01360     c = (current_ = p) < end_ ? *p++ : 0;
01361   // fractional part
01362   if (c == '.') {
01363     c = (current_ = p) < end_ ? *p++ : 0;
01364     while (c >= '0' && c <= '9')
01365       c = (current_ = p) < end_ ? *p++ : 0;
01366   }
01367   // exponential part
01368   if (c == 'e' || c == 'E') {
01369     c = (current_ = p) < end_ ? *p++ : 0;
01370     if (c == '+' || c == '-')
01371       c = (current_ = p) < end_ ? *p++ : 0;
01372     while (c >= '0' && c <= '9')
01373       c = (current_ = p) < end_ ? *p++ : 0;
01374   }
01375 }
01376 bool OurReader::readString() {
01377   Char c = 0;
01378   while (current_ != end_) {
01379     c = getNextChar();
01380     if (c == '\\')
01381       getNextChar();
01382     else if (c == '"')
01383       break;
01384   }
01385   return c == '"';
01386 }
01387 
01388 
01389 bool OurReader::readStringSingleQuote() {
01390   Char c = 0;
01391   while (current_ != end_) {
01392     c = getNextChar();
01393     if (c == '\\')
01394       getNextChar();
01395     else if (c == '\'')
01396       break;
01397   }
01398   return c == '\'';
01399 }
01400 
01401 bool OurReader::readObject(Token& tokenStart) {
01402   Token tokenName;
01403   std::string name;
01404   Value init(objectValue);
01405   currentValue().swapPayload(init);
01406   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01407   while (readToken(tokenName)) {
01408     bool initialTokenOk = true;
01409     while (tokenName.type_ == tokenComment && initialTokenOk)
01410       initialTokenOk = readToken(tokenName);
01411     if (!initialTokenOk)
01412       break;
01413     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
01414       return true;
01415     name = "";
01416     if (tokenName.type_ == tokenString) {
01417       if (!decodeString(tokenName, name))
01418         return recoverFromError(tokenObjectEnd);
01419     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
01420       Value numberName;
01421       if (!decodeNumber(tokenName, numberName))
01422         return recoverFromError(tokenObjectEnd);
01423       name = numberName.asString();
01424     } else {
01425       break;
01426     }
01427 
01428     Token colon;
01429     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
01430       return addErrorAndRecover(
01431           "Missing ':' after object member name", colon, tokenObjectEnd);
01432     }
01433     if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
01434     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
01435       std::string msg = "Duplicate key: '" + name + "'";
01436       return addErrorAndRecover(
01437           msg, tokenName, tokenObjectEnd);
01438     }
01439     Value& value = currentValue()[name];
01440     nodes_.push(&value);
01441     bool ok = readValue();
01442     nodes_.pop();
01443     if (!ok) // error already set
01444       return recoverFromError(tokenObjectEnd);
01445 
01446     Token comma;
01447     if (!readToken(comma) ||
01448         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
01449          comma.type_ != tokenComment)) {
01450       return addErrorAndRecover(
01451           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
01452     }
01453     bool finalizeTokenOk = true;
01454     while (comma.type_ == tokenComment && finalizeTokenOk)
01455       finalizeTokenOk = readToken(comma);
01456     if (comma.type_ == tokenObjectEnd)
01457       return true;
01458   }
01459   return addErrorAndRecover(
01460       "Missing '}' or object member name", tokenName, tokenObjectEnd);
01461 }
01462 
01463 bool OurReader::readArray(Token& tokenStart) {
01464   Value init(arrayValue);
01465   currentValue().swapPayload(init);
01466   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01467   skipSpaces();
01468   if (*current_ == ']') // empty array
01469   {
01470     Token endArray;
01471     readToken(endArray);
01472     return true;
01473   }
01474   int index = 0;
01475   for (;;) {
01476     Value& value = currentValue()[index++];
01477     nodes_.push(&value);
01478     bool ok = readValue();
01479     nodes_.pop();
01480     if (!ok) // error already set
01481       return recoverFromError(tokenArrayEnd);
01482 
01483     Token token;
01484     // Accept Comment after last item in the array.
01485     ok = readToken(token);
01486     while (token.type_ == tokenComment && ok) {
01487       ok = readToken(token);
01488     }
01489     bool badTokenType =
01490         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
01491     if (!ok || badTokenType) {
01492       return addErrorAndRecover(
01493           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
01494     }
01495     if (token.type_ == tokenArrayEnd)
01496       break;
01497   }
01498   return true;
01499 }
01500 
01501 bool OurReader::decodeNumber(Token& token) {
01502   Value decoded;
01503   if (!decodeNumber(token, decoded))
01504     return false;
01505   currentValue().swapPayload(decoded);
01506   currentValue().setOffsetStart(token.start_ - begin_);
01507   currentValue().setOffsetLimit(token.end_ - begin_);
01508   return true;
01509 }
01510 
01511 bool OurReader::decodeNumber(Token& token, Value& decoded) {
01512   // Attempts to parse the number as an integer. If the number is
01513   // larger than the maximum supported value of an integer then
01514   // we decode the number as a double.
01515   Location current = token.start_;
01516   bool isNegative = *current == '-';
01517   if (isNegative)
01518     ++current;
01519   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
01520   Value::LargestUInt maxIntegerValue =
01521       isNegative ? Value::LargestUInt(-Value::minLargestInt)
01522                  : Value::maxLargestUInt;
01523   Value::LargestUInt threshold = maxIntegerValue / 10;
01524   Value::LargestUInt value = 0;
01525   while (current < token.end_) {
01526     Char c = *current++;
01527     if (c < '0' || c > '9')
01528       return decodeDouble(token, decoded);
01529     Value::UInt digit(c - '0');
01530     if (value >= threshold) {
01531       // We've hit or exceeded the max value divided by 10 (rounded down). If
01532       // a) we've only just touched the limit, b) this is the last digit, and
01533       // c) it's small enough to fit in that rounding delta, we're okay.
01534       // Otherwise treat this number as a double to avoid overflow.
01535       if (value > threshold || current != token.end_ ||
01536           digit > maxIntegerValue % 10) {
01537         return decodeDouble(token, decoded);
01538       }
01539     }
01540     value = value * 10 + digit;
01541   }
01542   if (isNegative)
01543     decoded = -Value::LargestInt(value);
01544   else if (value <= Value::LargestUInt(Value::maxInt))
01545     decoded = Value::LargestInt(value);
01546   else
01547     decoded = value;
01548   return true;
01549 }
01550 
01551 bool OurReader::decodeDouble(Token& token) {
01552   Value decoded;
01553   if (!decodeDouble(token, decoded))
01554     return false;
01555   currentValue().swapPayload(decoded);
01556   currentValue().setOffsetStart(token.start_ - begin_);
01557   currentValue().setOffsetLimit(token.end_ - begin_);
01558   return true;
01559 }
01560 
01561 bool OurReader::decodeDouble(Token& token, Value& decoded) {
01562   double value = 0;
01563   const int bufferSize = 32;
01564   int count;
01565   int length = int(token.end_ - token.start_);
01566 
01567   // Sanity check to avoid buffer overflow exploits.
01568   if (length < 0) {
01569     return addError("Unable to parse token length", token);
01570   }
01571 
01572   // Avoid using a string constant for the format control string given to
01573   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
01574   // info:
01575   //
01576   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
01577   char format[] = "%lf";
01578 
01579   if (length <= bufferSize) {
01580     Char buffer[bufferSize + 1];
01581     memcpy(buffer, token.start_, length);
01582     buffer[length] = 0;
01583     count = sscanf(buffer, format, &value);
01584   } else {
01585     std::string buffer(token.start_, token.end_);
01586     count = sscanf(buffer.c_str(), format, &value);
01587   }
01588 
01589   if (count != 1)
01590     return addError("'" + std::string(token.start_, token.end_) +
01591                         "' is not a number.",
01592                     token);
01593   decoded = value;
01594   return true;
01595 }
01596 
01597 bool OurReader::decodeString(Token& token) {
01598   std::string decoded_string;
01599   if (!decodeString(token, decoded_string))
01600     return false;
01601   Value decoded(decoded_string);
01602   currentValue().swapPayload(decoded);
01603   currentValue().setOffsetStart(token.start_ - begin_);
01604   currentValue().setOffsetLimit(token.end_ - begin_);
01605   return true;
01606 }
01607 
01608 bool OurReader::decodeString(Token& token, std::string& decoded) {
01609   decoded.reserve(token.end_ - token.start_ - 2);
01610   Location current = token.start_ + 1; // skip '"'
01611   Location end = token.end_ - 1;       // do not include '"'
01612   while (current != end) {
01613     Char c = *current++;
01614     if (c == '"')
01615       break;
01616     else if (c == '\\') {
01617       if (current == end)
01618         return addError("Empty escape sequence in string", token, current);
01619       Char escape = *current++;
01620       switch (escape) {
01621       case '"':
01622         decoded += '"';
01623         break;
01624       case '/':
01625         decoded += '/';
01626         break;
01627       case '\\':
01628         decoded += '\\';
01629         break;
01630       case 'b':
01631         decoded += '\b';
01632         break;
01633       case 'f':
01634         decoded += '\f';
01635         break;
01636       case 'n':
01637         decoded += '\n';
01638         break;
01639       case 'r':
01640         decoded += '\r';
01641         break;
01642       case 't':
01643         decoded += '\t';
01644         break;
01645       case 'u': {
01646         unsigned int unicode;
01647         if (!decodeUnicodeCodePoint(token, current, end, unicode))
01648           return false;
01649         decoded += codePointToUTF8(unicode);
01650       } break;
01651       default:
01652         return addError("Bad escape sequence in string", token, current);
01653       }
01654     } else {
01655       decoded += c;
01656     }
01657   }
01658   return true;
01659 }
01660 
01661 bool OurReader::decodeUnicodeCodePoint(Token& token,
01662                                     Location& current,
01663                                     Location end,
01664                                     unsigned int& unicode) {
01665 
01666   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
01667     return false;
01668   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
01669     // surrogate pairs
01670     if (end - current < 6)
01671       return addError(
01672           "additional six characters expected to parse unicode surrogate pair.",
01673           token,
01674           current);
01675     unsigned int surrogatePair;
01676     if (*(current++) == '\\' && *(current++) == 'u') {
01677       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
01678         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
01679       } else
01680         return false;
01681     } else
01682       return addError("expecting another \\u token to begin the second half of "
01683                       "a unicode surrogate pair",
01684                       token,
01685                       current);
01686   }
01687   return true;
01688 }
01689 
01690 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
01691                                          Location& current,
01692                                          Location end,
01693                                          unsigned int& unicode) {
01694   if (end - current < 4)
01695     return addError(
01696         "Bad unicode escape sequence in string: four digits expected.",
01697         token,
01698         current);
01699   unicode = 0;
01700   for (int index = 0; index < 4; ++index) {
01701     Char c = *current++;
01702     unicode *= 16;
01703     if (c >= '0' && c <= '9')
01704       unicode += c - '0';
01705     else if (c >= 'a' && c <= 'f')
01706       unicode += c - 'a' + 10;
01707     else if (c >= 'A' && c <= 'F')
01708       unicode += c - 'A' + 10;
01709     else
01710       return addError(
01711           "Bad unicode escape sequence in string: hexadecimal digit expected.",
01712           token,
01713           current);
01714   }
01715   return true;
01716 }
01717 
01718 bool
01719 OurReader::addError(const std::string& message, Token& token, Location extra) {
01720   ErrorInfo info;
01721   info.token_ = token;
01722   info.message_ = message;
01723   info.extra_ = extra;
01724   errors_.push_back(info);
01725   return false;
01726 }
01727 
01728 bool OurReader::recoverFromError(TokenType skipUntilToken) {
01729   int errorCount = int(errors_.size());
01730   Token skip;
01731   for (;;) {
01732     if (!readToken(skip))
01733       errors_.resize(errorCount); // discard errors caused by recovery
01734     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
01735       break;
01736   }
01737   errors_.resize(errorCount);
01738   return false;
01739 }
01740 
01741 bool OurReader::addErrorAndRecover(const std::string& message,
01742                                 Token& token,
01743                                 TokenType skipUntilToken) {
01744   addError(message, token);
01745   return recoverFromError(skipUntilToken);
01746 }
01747 
01748 Value& OurReader::currentValue() { return *(nodes_.top()); }
01749 
01750 OurReader::Char OurReader::getNextChar() {
01751   if (current_ == end_)
01752     return 0;
01753   return *current_++;
01754 }
01755 
01756 void OurReader::getLocationLineAndColumn(Location location,
01757                                       int& line,
01758                                       int& column) const {
01759   Location current = begin_;
01760   Location lastLineStart = current;
01761   line = 0;
01762   while (current < location && current != end_) {
01763     Char c = *current++;
01764     if (c == '\r') {
01765       if (*current == '\n')
01766         ++current;
01767       lastLineStart = current;
01768       ++line;
01769     } else if (c == '\n') {
01770       lastLineStart = current;
01771       ++line;
01772     }
01773   }
01774   // column & line start at 1
01775   column = int(location - lastLineStart) + 1;
01776   ++line;
01777 }
01778 
01779 std::string OurReader::getLocationLineAndColumn(Location location) const {
01780   int line, column;
01781   getLocationLineAndColumn(location, line, column);
01782   char buffer[18 + 16 + 16 + 1];
01783 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
01784 #if defined(WINCE)
01785   _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01786 #else
01787   sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01788 #endif
01789 #else
01790   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01791 #endif
01792   return buffer;
01793 }
01794 
01795 std::string OurReader::getFormattedErrorMessages() const {
01796   std::string formattedMessage;
01797   for (Errors::const_iterator itError = errors_.begin();
01798        itError != errors_.end();
01799        ++itError) {
01800     const ErrorInfo& error = *itError;
01801     formattedMessage +=
01802         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
01803     formattedMessage += "  " + error.message_ + "\n";
01804     if (error.extra_)
01805       formattedMessage +=
01806           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
01807   }
01808   return formattedMessage;
01809 }
01810 
01811 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
01812   std::vector<OurReader::StructuredError> allErrors;
01813   for (Errors::const_iterator itError = errors_.begin();
01814        itError != errors_.end();
01815        ++itError) {
01816     const ErrorInfo& error = *itError;
01817     OurReader::StructuredError structured;
01818     structured.offset_start = error.token_.start_ - begin_;
01819     structured.offset_limit = error.token_.end_ - begin_;
01820     structured.message = error.message_;
01821     allErrors.push_back(structured);
01822   }
01823   return allErrors;
01824 }
01825 
01826 bool OurReader::pushError(const Value& value, const std::string& message) {
01827   size_t length = end_ - begin_;
01828   if(value.getOffsetStart() > length
01829     || value.getOffsetLimit() > length)
01830     return false;
01831   Token token;
01832   token.type_ = tokenError;
01833   token.start_ = begin_ + value.getOffsetStart();
01834   token.end_ = end_ + value.getOffsetLimit();
01835   ErrorInfo info;
01836   info.token_ = token;
01837   info.message_ = message;
01838   info.extra_ = 0;
01839   errors_.push_back(info);
01840   return true;
01841 }
01842 
01843 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
01844   size_t length = end_ - begin_;
01845   if(value.getOffsetStart() > length
01846     || value.getOffsetLimit() > length
01847     || extra.getOffsetLimit() > length)
01848     return false;
01849   Token token;
01850   token.type_ = tokenError;
01851   token.start_ = begin_ + value.getOffsetStart();
01852   token.end_ = begin_ + value.getOffsetLimit();
01853   ErrorInfo info;
01854   info.token_ = token;
01855   info.message_ = message;
01856   info.extra_ = begin_ + extra.getOffsetStart();
01857   errors_.push_back(info);
01858   return true;
01859 }
01860 
01861 bool OurReader::good() const {
01862   return !errors_.size();
01863 }
01864 
01865 
01866 class OurCharReader : public CharReader {
01867   bool const collectComments_;
01868   OurReader reader_;
01869 public:
01870   OurCharReader(
01871     bool collectComments,
01872     OurFeatures const& features)
01873   : collectComments_(collectComments)
01874   , reader_(features)
01875   {}
01876   virtual bool parse(
01877       char const* beginDoc, char const* endDoc,
01878       Value* root, std::string* errs) {
01879     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
01880     if (errs) {
01881       *errs = reader_.getFormattedErrorMessages();
01882     }
01883     return ok;
01884   }
01885 };
01886 
01887 CharReaderBuilder::CharReaderBuilder()
01888 {
01889   setDefaults(&settings_);
01890 }
01891 CharReaderBuilder::~CharReaderBuilder()
01892 {}
01893 CharReader* CharReaderBuilder::newCharReader() const
01894 {
01895   bool collectComments = settings_["collectComments"].asBool();
01896   OurFeatures features = OurFeatures::all();
01897   features.allowComments_ = settings_["allowComments"].asBool();
01898   features.strictRoot_ = settings_["strictRoot"].asBool();
01899   features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
01900   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
01901   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
01902   features.stackLimit_ = settings_["stackLimit"].asInt();
01903   features.failIfExtra_ = settings_["failIfExtra"].asBool();
01904   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
01905   return new OurCharReader(collectComments, features);
01906 }
01907 static void getValidReaderKeys(std::set<std::string>* valid_keys)
01908 {
01909   valid_keys->clear();
01910   valid_keys->insert("collectComments");
01911   valid_keys->insert("allowComments");
01912   valid_keys->insert("strictRoot");
01913   valid_keys->insert("allowDroppedNullPlaceholders");
01914   valid_keys->insert("allowNumericKeys");
01915   valid_keys->insert("allowSingleQuotes");
01916   valid_keys->insert("stackLimit");
01917   valid_keys->insert("failIfExtra");
01918   valid_keys->insert("rejectDupKeys");
01919 }
01920 bool CharReaderBuilder::validate(Json::Value* invalid) const
01921 {
01922   Json::Value my_invalid;
01923   if (!invalid) invalid = &my_invalid;  // so we do not need to test for NULL
01924   Json::Value& inv = *invalid;
01925   std::set<std::string> valid_keys;
01926   getValidReaderKeys(&valid_keys);
01927   Value::Members keys = settings_.getMemberNames();
01928   size_t n = keys.size();
01929   for (size_t i = 0; i < n; ++i) {
01930     std::string const& key = keys[i];
01931     if (valid_keys.find(key) == valid_keys.end()) {
01932       inv[key] = settings_[key];
01933     }
01934   }
01935   return 0u == inv.size();
01936 }
01937 Value& CharReaderBuilder::operator[](std::string key)
01938 {
01939   return settings_[key];
01940 }
01941 // static
01942 void CharReaderBuilder::strictMode(Json::Value* settings)
01943 {
01945   (*settings)["allowComments"] = false;
01946   (*settings)["strictRoot"] = true;
01947   (*settings)["allowDroppedNullPlaceholders"] = false;
01948   (*settings)["allowNumericKeys"] = false;
01949   (*settings)["allowSingleQuotes"] = false;
01950   (*settings)["failIfExtra"] = true;
01951   (*settings)["rejectDupKeys"] = true;
01953 }
01954 // static
01955 void CharReaderBuilder::setDefaults(Json::Value* settings)
01956 {
01958   (*settings)["collectComments"] = true;
01959   (*settings)["allowComments"] = true;
01960   (*settings)["strictRoot"] = false;
01961   (*settings)["allowDroppedNullPlaceholders"] = false;
01962   (*settings)["allowNumericKeys"] = false;
01963   (*settings)["allowSingleQuotes"] = false;
01964   (*settings)["stackLimit"] = 1000;
01965   (*settings)["failIfExtra"] = false;
01966   (*settings)["rejectDupKeys"] = false;
01968 }
01969 
01971 // global functions
01972 
01973 bool parseFromStream(
01974     CharReader::Factory const& fact, std::istream& sin,
01975     Value* root, std::string* errs)
01976 {
01977   std::ostringstream ssin;
01978   ssin << sin.rdbuf();
01979   std::string doc = ssin.str();
01980   char const* begin = doc.data();
01981   char const* end = begin + doc.size();
01982   // Note that we do not actually need a null-terminator.
01983   CharReaderPtr const reader(fact.newCharReader());
01984   return reader->parse(begin, end, root, errs);
01985 }
01986 
01987 std::istream& operator>>(std::istream& sin, Value& root) {
01988   CharReaderBuilder b;
01989   std::string errs;
01990   bool ok = parseFromStream(b, sin, &root, &errs);
01991   if (!ok) {
01992     fprintf(stderr,
01993             "Error from reader: %s",
01994             errs.c_str());
01995 
01996     throwRuntimeError("reader error");
01997   }
01998   return sin;
01999 }
02000 
02001 } // namespace Json