00001
00002
00003
00004
00005
00006 #if !defined(JSON_IS_AMALGAMATION)
00007 #include <json/assertions.h>
00008 #include <json/reader.h>
00009 #include <json/value.h>
00010 #include "json_tool.h"
00011 #endif // if !defined(JSON_IS_AMALGAMATION)
00012 #include <utility>
00013 #include <cstdio>
00014 #include <cassert>
00015 #include <cstring>
00016 #include <istream>
00017 #include <sstream>
00018 #include <memory>
00019 #include <set>
00020
00021 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
00022 #define snprintf _snprintf
00023 #endif
00024
00025 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
00026
00027 #pragma warning(disable : 4996)
00028 #endif
00029
00030 static int const stackLimit_g = 1000;
00031 static int stackDepth_g = 0;
00032
00033 namespace Json {
00034
00035 #if __cplusplus >= 201103L
00036 typedef std::unique_ptr<CharReader> CharReaderPtr;
00037 #else
00038 typedef std::auto_ptr<CharReader> CharReaderPtr;
00039 #endif
00040
00041
00042
00043
00044 Features::Features()
00045 : allowComments_(true), strictRoot_(false),
00046 allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
00047
00048 Features Features::all() { return Features(); }
00049
00050 Features Features::strictMode() {
00051 Features features;
00052 features.allowComments_ = false;
00053 features.strictRoot_ = true;
00054 features.allowDroppedNullPlaceholders_ = false;
00055 features.allowNumericKeys_ = false;
00056 return features;
00057 }
00058
00059
00060
00061
00062 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
00063 for (; begin < end; ++begin)
00064 if (*begin == '\n' || *begin == '\r')
00065 return true;
00066 return false;
00067 }
00068
00069
00070
00071
00072 Reader::Reader()
00073 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00074 lastValue_(), commentsBefore_(), features_(Features::all()),
00075 collectComments_() {}
00076
00077 Reader::Reader(const Features& features)
00078 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00079 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
00080 }
00081
00082 bool
00083 Reader::parse(const std::string& document, Value& root, bool collectComments) {
00084 document_ = document;
00085 const char* begin = document_.c_str();
00086 const char* end = begin + document_.length();
00087 return parse(begin, end, root, collectComments);
00088 }
00089
00090 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
00091
00092
00093
00094
00095
00096
00097
00098 std::string doc;
00099 std::getline(sin, doc, (char)EOF);
00100 return parse(doc, root, collectComments);
00101 }
00102
00103 bool Reader::parse(const char* beginDoc,
00104 const char* endDoc,
00105 Value& root,
00106 bool collectComments) {
00107 if (!features_.allowComments_) {
00108 collectComments = false;
00109 }
00110
00111 begin_ = beginDoc;
00112 end_ = endDoc;
00113 collectComments_ = collectComments;
00114 current_ = begin_;
00115 lastValueEnd_ = 0;
00116 lastValue_ = 0;
00117 commentsBefore_ = "";
00118 errors_.clear();
00119 while (!nodes_.empty())
00120 nodes_.pop();
00121 nodes_.push(&root);
00122
00123 stackDepth_g = 0;
00124 bool successful = readValue();
00125 Token token;
00126 skipCommentTokens(token);
00127 if (collectComments_ && !commentsBefore_.empty())
00128 root.setComment(commentsBefore_, commentAfter);
00129 if (features_.strictRoot_) {
00130 if (!root.isArray() && !root.isObject()) {
00131
00132
00133 token.type_ = tokenError;
00134 token.start_ = beginDoc;
00135 token.end_ = endDoc;
00136 addError(
00137 "A valid JSON document must be either an array or an object value.",
00138 token);
00139 return false;
00140 }
00141 }
00142 return successful;
00143 }
00144
00145 bool Reader::readValue() {
00146
00147
00148
00149
00150 if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
00151 ++stackDepth_g;
00152
00153 Token token;
00154 skipCommentTokens(token);
00155 bool successful = true;
00156
00157 if (collectComments_ && !commentsBefore_.empty()) {
00158 currentValue().setComment(commentsBefore_, commentBefore);
00159 commentsBefore_ = "";
00160 }
00161
00162 switch (token.type_) {
00163 case tokenObjectBegin:
00164 successful = readObject(token);
00165 currentValue().setOffsetLimit(current_ - begin_);
00166 break;
00167 case tokenArrayBegin:
00168 successful = readArray(token);
00169 currentValue().setOffsetLimit(current_ - begin_);
00170 break;
00171 case tokenNumber:
00172 successful = decodeNumber(token);
00173 break;
00174 case tokenString:
00175 successful = decodeString(token);
00176 break;
00177 case tokenTrue:
00178 {
00179 Value v(true);
00180 currentValue().swapPayload(v);
00181 currentValue().setOffsetStart(token.start_ - begin_);
00182 currentValue().setOffsetLimit(token.end_ - begin_);
00183 }
00184 break;
00185 case tokenFalse:
00186 {
00187 Value v(false);
00188 currentValue().swapPayload(v);
00189 currentValue().setOffsetStart(token.start_ - begin_);
00190 currentValue().setOffsetLimit(token.end_ - begin_);
00191 }
00192 break;
00193 case tokenNull:
00194 {
00195 Value v;
00196 currentValue().swapPayload(v);
00197 currentValue().setOffsetStart(token.start_ - begin_);
00198 currentValue().setOffsetLimit(token.end_ - begin_);
00199 }
00200 break;
00201 case tokenArraySeparator:
00202 case tokenObjectEnd:
00203 case tokenArrayEnd:
00204 if (features_.allowDroppedNullPlaceholders_) {
00205
00206
00207 current_--;
00208 Value v;
00209 currentValue().swapPayload(v);
00210 currentValue().setOffsetStart(current_ - begin_ - 1);
00211 currentValue().setOffsetLimit(current_ - begin_);
00212 break;
00213 }
00214 default:
00215 currentValue().setOffsetStart(token.start_ - begin_);
00216 currentValue().setOffsetLimit(token.end_ - begin_);
00217 return addError("Syntax error: value, object or array expected.", token);
00218 }
00219
00220 if (collectComments_) {
00221 lastValueEnd_ = current_;
00222 lastValue_ = ¤tValue();
00223 }
00224
00225 --stackDepth_g;
00226 return successful;
00227 }
00228
00229 void Reader::skipCommentTokens(Token& token) {
00230 if (features_.allowComments_) {
00231 do {
00232 readToken(token);
00233 } while (token.type_ == tokenComment);
00234 } else {
00235 readToken(token);
00236 }
00237 }
00238
00239 bool Reader::readToken(Token& token) {
00240 skipSpaces();
00241 token.start_ = current_;
00242 Char c = getNextChar();
00243 bool ok = true;
00244 switch (c) {
00245 case '{':
00246 token.type_ = tokenObjectBegin;
00247 break;
00248 case '}':
00249 token.type_ = tokenObjectEnd;
00250 break;
00251 case '[':
00252 token.type_ = tokenArrayBegin;
00253 break;
00254 case ']':
00255 token.type_ = tokenArrayEnd;
00256 break;
00257 case '"':
00258 token.type_ = tokenString;
00259 ok = readString();
00260 break;
00261 case '/':
00262 token.type_ = tokenComment;
00263 ok = readComment();
00264 break;
00265 case '0':
00266 case '1':
00267 case '2':
00268 case '3':
00269 case '4':
00270 case '5':
00271 case '6':
00272 case '7':
00273 case '8':
00274 case '9':
00275 case '-':
00276 token.type_ = tokenNumber;
00277 readNumber();
00278 break;
00279 case 't':
00280 token.type_ = tokenTrue;
00281 ok = match("rue", 3);
00282 break;
00283 case 'f':
00284 token.type_ = tokenFalse;
00285 ok = match("alse", 4);
00286 break;
00287 case 'n':
00288 token.type_ = tokenNull;
00289 ok = match("ull", 3);
00290 break;
00291 case ',':
00292 token.type_ = tokenArraySeparator;
00293 break;
00294 case ':':
00295 token.type_ = tokenMemberSeparator;
00296 break;
00297 case 0:
00298 token.type_ = tokenEndOfStream;
00299 break;
00300 default:
00301 ok = false;
00302 break;
00303 }
00304 if (!ok)
00305 token.type_ = tokenError;
00306 token.end_ = current_;
00307 return true;
00308 }
00309
00310 void Reader::skipSpaces() {
00311 while (current_ != end_) {
00312 Char c = *current_;
00313 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
00314 ++current_;
00315 else
00316 break;
00317 }
00318 }
00319
00320 bool Reader::match(Location pattern, int patternLength) {
00321 if (end_ - current_ < patternLength)
00322 return false;
00323 int index = patternLength;
00324 while (index--)
00325 if (current_[index] != pattern[index])
00326 return false;
00327 current_ += patternLength;
00328 return true;
00329 }
00330
00331 bool Reader::readComment() {
00332 Location commentBegin = current_ - 1;
00333 Char c = getNextChar();
00334 bool successful = false;
00335 if (c == '*')
00336 successful = readCStyleComment();
00337 else if (c == '/')
00338 successful = readCppStyleComment();
00339 if (!successful)
00340 return false;
00341
00342 if (collectComments_) {
00343 CommentPlacement placement = commentBefore;
00344 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
00345 if (c != '*' || !containsNewLine(commentBegin, current_))
00346 placement = commentAfterOnSameLine;
00347 }
00348
00349 addComment(commentBegin, current_, placement);
00350 }
00351 return true;
00352 }
00353
00354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
00355 std::string normalized;
00356 normalized.reserve(end - begin);
00357 Reader::Location current = begin;
00358 while (current != end) {
00359 char c = *current++;
00360 if (c == '\r') {
00361 if (current != end && *current == '\n')
00362
00363 ++current;
00364
00365 normalized += '\n';
00366 } else {
00367 normalized += c;
00368 }
00369 }
00370 return normalized;
00371 }
00372
00373 void
00374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
00375 assert(collectComments_);
00376 const std::string& normalized = normalizeEOL(begin, end);
00377 if (placement == commentAfterOnSameLine) {
00378 assert(lastValue_ != 0);
00379 lastValue_->setComment(normalized, placement);
00380 } else {
00381 commentsBefore_ += normalized;
00382 }
00383 }
00384
00385 bool Reader::readCStyleComment() {
00386 while (current_ != end_) {
00387 Char c = getNextChar();
00388 if (c == '*' && *current_ == '/')
00389 break;
00390 }
00391 return getNextChar() == '/';
00392 }
00393
00394 bool Reader::readCppStyleComment() {
00395 while (current_ != end_) {
00396 Char c = getNextChar();
00397 if (c == '\n')
00398 break;
00399 if (c == '\r') {
00400
00401 if (current_ != end_ && *current_ == '\n')
00402 getNextChar();
00403
00404 break;
00405 }
00406 }
00407 return true;
00408 }
00409
00410 void Reader::readNumber() {
00411 const char *p = current_;
00412 char c = '0';
00413
00414 while (c >= '0' && c <= '9')
00415 c = (current_ = p) < end_ ? *p++ : 0;
00416
00417 if (c == '.') {
00418 c = (current_ = p) < end_ ? *p++ : 0;
00419 while (c >= '0' && c <= '9')
00420 c = (current_ = p) < end_ ? *p++ : 0;
00421 }
00422
00423 if (c == 'e' || c == 'E') {
00424 c = (current_ = p) < end_ ? *p++ : 0;
00425 if (c == '+' || c == '-')
00426 c = (current_ = p) < end_ ? *p++ : 0;
00427 while (c >= '0' && c <= '9')
00428 c = (current_ = p) < end_ ? *p++ : 0;
00429 }
00430 }
00431
00432 bool Reader::readString() {
00433 Char c = 0;
00434 while (current_ != end_) {
00435 c = getNextChar();
00436 if (c == '\\')
00437 getNextChar();
00438 else if (c == '"')
00439 break;
00440 }
00441 return c == '"';
00442 }
00443
00444 bool Reader::readObject(Token& tokenStart) {
00445 Token tokenName;
00446 std::string name;
00447 Value init(objectValue);
00448 currentValue().swapPayload(init);
00449 currentValue().setOffsetStart(tokenStart.start_ - begin_);
00450 while (readToken(tokenName)) {
00451 bool initialTokenOk = true;
00452 while (tokenName.type_ == tokenComment && initialTokenOk)
00453 initialTokenOk = readToken(tokenName);
00454 if (!initialTokenOk)
00455 break;
00456 if (tokenName.type_ == tokenObjectEnd && name.empty())
00457 return true;
00458 name = "";
00459 if (tokenName.type_ == tokenString) {
00460 if (!decodeString(tokenName, name))
00461 return recoverFromError(tokenObjectEnd);
00462 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
00463 Value numberName;
00464 if (!decodeNumber(tokenName, numberName))
00465 return recoverFromError(tokenObjectEnd);
00466 name = numberName.asString();
00467 } else {
00468 break;
00469 }
00470
00471 Token colon;
00472 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
00473 return addErrorAndRecover(
00474 "Missing ':' after object member name", colon, tokenObjectEnd);
00475 }
00476 Value& value = currentValue()[name];
00477 nodes_.push(&value);
00478 bool ok = readValue();
00479 nodes_.pop();
00480 if (!ok)
00481 return recoverFromError(tokenObjectEnd);
00482
00483 Token comma;
00484 if (!readToken(comma) ||
00485 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
00486 comma.type_ != tokenComment)) {
00487 return addErrorAndRecover(
00488 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
00489 }
00490 bool finalizeTokenOk = true;
00491 while (comma.type_ == tokenComment && finalizeTokenOk)
00492 finalizeTokenOk = readToken(comma);
00493 if (comma.type_ == tokenObjectEnd)
00494 return true;
00495 }
00496 return addErrorAndRecover(
00497 "Missing '}' or object member name", tokenName, tokenObjectEnd);
00498 }
00499
00500 bool Reader::readArray(Token& tokenStart) {
00501 Value init(arrayValue);
00502 currentValue().swapPayload(init);
00503 currentValue().setOffsetStart(tokenStart.start_ - begin_);
00504 skipSpaces();
00505 if (*current_ == ']')
00506 {
00507 Token endArray;
00508 readToken(endArray);
00509 return true;
00510 }
00511 int index = 0;
00512 for (;;) {
00513 Value& value = currentValue()[index++];
00514 nodes_.push(&value);
00515 bool ok = readValue();
00516 nodes_.pop();
00517 if (!ok)
00518 return recoverFromError(tokenArrayEnd);
00519
00520 Token token;
00521
00522 ok = readToken(token);
00523 while (token.type_ == tokenComment && ok) {
00524 ok = readToken(token);
00525 }
00526 bool badTokenType =
00527 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
00528 if (!ok || badTokenType) {
00529 return addErrorAndRecover(
00530 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
00531 }
00532 if (token.type_ == tokenArrayEnd)
00533 break;
00534 }
00535 return true;
00536 }
00537
00538 bool Reader::decodeNumber(Token& token) {
00539 Value decoded;
00540 if (!decodeNumber(token, decoded))
00541 return false;
00542 currentValue().swapPayload(decoded);
00543 currentValue().setOffsetStart(token.start_ - begin_);
00544 currentValue().setOffsetLimit(token.end_ - begin_);
00545 return true;
00546 }
00547
00548 bool Reader::decodeNumber(Token& token, Value& decoded) {
00549
00550
00551
00552 Location current = token.start_;
00553 bool isNegative = *current == '-';
00554 if (isNegative)
00555 ++current;
00556
00557 Value::LargestUInt maxIntegerValue =
00558 isNegative ? Value::LargestUInt(-Value::minLargestInt)
00559 : Value::maxLargestUInt;
00560 Value::LargestUInt threshold = maxIntegerValue / 10;
00561 Value::LargestUInt value = 0;
00562 while (current < token.end_) {
00563 Char c = *current++;
00564 if (c < '0' || c > '9')
00565 return decodeDouble(token, decoded);
00566 Value::UInt digit(c - '0');
00567 if (value >= threshold) {
00568
00569
00570
00571
00572 if (value > threshold || current != token.end_ ||
00573 digit > maxIntegerValue % 10) {
00574 return decodeDouble(token, decoded);
00575 }
00576 }
00577 value = value * 10 + digit;
00578 }
00579 if (isNegative)
00580 decoded = -Value::LargestInt(value);
00581 else if (value <= Value::LargestUInt(Value::maxInt))
00582 decoded = Value::LargestInt(value);
00583 else
00584 decoded = value;
00585 return true;
00586 }
00587
00588 bool Reader::decodeDouble(Token& token) {
00589 Value decoded;
00590 if (!decodeDouble(token, decoded))
00591 return false;
00592 currentValue().swapPayload(decoded);
00593 currentValue().setOffsetStart(token.start_ - begin_);
00594 currentValue().setOffsetLimit(token.end_ - begin_);
00595 return true;
00596 }
00597
00598 bool Reader::decodeDouble(Token& token, Value& decoded) {
00599 double value = 0;
00600 const int bufferSize = 32;
00601 int count;
00602 int length = int(token.end_ - token.start_);
00603
00604
00605 if (length < 0) {
00606 return addError("Unable to parse token length", token);
00607 }
00608
00609
00610
00611
00612
00613
00614 char format[] = "%lf";
00615
00616 if (length <= bufferSize) {
00617 Char buffer[bufferSize + 1];
00618 memcpy(buffer, token.start_, length);
00619 buffer[length] = 0;
00620 count = sscanf(buffer, format, &value);
00621 } else {
00622 std::string buffer(token.start_, token.end_);
00623 count = sscanf(buffer.c_str(), format, &value);
00624 }
00625
00626 if (count != 1)
00627 return addError("'" + std::string(token.start_, token.end_) +
00628 "' is not a number.",
00629 token);
00630 decoded = value;
00631 return true;
00632 }
00633
00634 bool Reader::decodeString(Token& token) {
00635 std::string decoded_string;
00636 if (!decodeString(token, decoded_string))
00637 return false;
00638 Value decoded(decoded_string);
00639 currentValue().swapPayload(decoded);
00640 currentValue().setOffsetStart(token.start_ - begin_);
00641 currentValue().setOffsetLimit(token.end_ - begin_);
00642 return true;
00643 }
00644
00645 bool Reader::decodeString(Token& token, std::string& decoded) {
00646 decoded.reserve(token.end_ - token.start_ - 2);
00647 Location current = token.start_ + 1;
00648 Location end = token.end_ - 1;
00649 while (current != end) {
00650 Char c = *current++;
00651 if (c == '"')
00652 break;
00653 else if (c == '\\') {
00654 if (current == end)
00655 return addError("Empty escape sequence in string", token, current);
00656 Char escape = *current++;
00657 switch (escape) {
00658 case '"':
00659 decoded += '"';
00660 break;
00661 case '/':
00662 decoded += '/';
00663 break;
00664 case '\\':
00665 decoded += '\\';
00666 break;
00667 case 'b':
00668 decoded += '\b';
00669 break;
00670 case 'f':
00671 decoded += '\f';
00672 break;
00673 case 'n':
00674 decoded += '\n';
00675 break;
00676 case 'r':
00677 decoded += '\r';
00678 break;
00679 case 't':
00680 decoded += '\t';
00681 break;
00682 case 'u': {
00683 unsigned int unicode;
00684 if (!decodeUnicodeCodePoint(token, current, end, unicode))
00685 return false;
00686 decoded += codePointToUTF8(unicode);
00687 } break;
00688 default:
00689 return addError("Bad escape sequence in string", token, current);
00690 }
00691 } else {
00692 decoded += c;
00693 }
00694 }
00695 return true;
00696 }
00697
00698 bool Reader::decodeUnicodeCodePoint(Token& token,
00699 Location& current,
00700 Location end,
00701 unsigned int& unicode) {
00702
00703 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
00704 return false;
00705 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
00706
00707 if (end - current < 6)
00708 return addError(
00709 "additional six characters expected to parse unicode surrogate pair.",
00710 token,
00711 current);
00712 unsigned int surrogatePair;
00713 if (*(current++) == '\\' && *(current++) == 'u') {
00714 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
00715 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00716 } else
00717 return false;
00718 } else
00719 return addError("expecting another \\u token to begin the second half of "
00720 "a unicode surrogate pair",
00721 token,
00722 current);
00723 }
00724 return true;
00725 }
00726
00727 bool Reader::decodeUnicodeEscapeSequence(Token& token,
00728 Location& current,
00729 Location end,
00730 unsigned int& unicode) {
00731 if (end - current < 4)
00732 return addError(
00733 "Bad unicode escape sequence in string: four digits expected.",
00734 token,
00735 current);
00736 unicode = 0;
00737 for (int index = 0; index < 4; ++index) {
00738 Char c = *current++;
00739 unicode *= 16;
00740 if (c >= '0' && c <= '9')
00741 unicode += c - '0';
00742 else if (c >= 'a' && c <= 'f')
00743 unicode += c - 'a' + 10;
00744 else if (c >= 'A' && c <= 'F')
00745 unicode += c - 'A' + 10;
00746 else
00747 return addError(
00748 "Bad unicode escape sequence in string: hexadecimal digit expected.",
00749 token,
00750 current);
00751 }
00752 return true;
00753 }
00754
00755 bool
00756 Reader::addError(const std::string& message, Token& token, Location extra) {
00757 ErrorInfo info;
00758 info.token_ = token;
00759 info.message_ = message;
00760 info.extra_ = extra;
00761 errors_.push_back(info);
00762 return false;
00763 }
00764
00765 bool Reader::recoverFromError(TokenType skipUntilToken) {
00766 int errorCount = int(errors_.size());
00767 Token skip;
00768 for (;;) {
00769 if (!readToken(skip))
00770 errors_.resize(errorCount);
00771 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
00772 break;
00773 }
00774 errors_.resize(errorCount);
00775 return false;
00776 }
00777
00778 bool Reader::addErrorAndRecover(const std::string& message,
00779 Token& token,
00780 TokenType skipUntilToken) {
00781 addError(message, token);
00782 return recoverFromError(skipUntilToken);
00783 }
00784
00785 Value& Reader::currentValue() { return *(nodes_.top()); }
00786
00787 Reader::Char Reader::getNextChar() {
00788 if (current_ == end_)
00789 return 0;
00790 return *current_++;
00791 }
00792
00793 void Reader::getLocationLineAndColumn(Location location,
00794 int& line,
00795 int& column) const {
00796 Location current = begin_;
00797 Location lastLineStart = current;
00798 line = 0;
00799 while (current < location && current != end_) {
00800 Char c = *current++;
00801 if (c == '\r') {
00802 if (*current == '\n')
00803 ++current;
00804 lastLineStart = current;
00805 ++line;
00806 } else if (c == '\n') {
00807 lastLineStart = current;
00808 ++line;
00809 }
00810 }
00811
00812 column = int(location - lastLineStart) + 1;
00813 ++line;
00814 }
00815
00816 std::string Reader::getLocationLineAndColumn(Location location) const {
00817 int line, column;
00818 getLocationLineAndColumn(location, line, column);
00819 char buffer[18 + 16 + 16 + 1];
00820 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
00821 #if defined(WINCE)
00822 _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00823 #else
00824 sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00825 #endif
00826 #else
00827 snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00828 #endif
00829 return buffer;
00830 }
00831
00832
00833 std::string Reader::getFormatedErrorMessages() const {
00834 return getFormattedErrorMessages();
00835 }
00836
00837 std::string Reader::getFormattedErrorMessages() const {
00838 std::string formattedMessage;
00839 for (Errors::const_iterator itError = errors_.begin();
00840 itError != errors_.end();
00841 ++itError) {
00842 const ErrorInfo& error = *itError;
00843 formattedMessage +=
00844 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
00845 formattedMessage += " " + error.message_ + "\n";
00846 if (error.extra_)
00847 formattedMessage +=
00848 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
00849 }
00850 return formattedMessage;
00851 }
00852
00853 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
00854 std::vector<Reader::StructuredError> allErrors;
00855 for (Errors::const_iterator itError = errors_.begin();
00856 itError != errors_.end();
00857 ++itError) {
00858 const ErrorInfo& error = *itError;
00859 Reader::StructuredError structured;
00860 structured.offset_start = error.token_.start_ - begin_;
00861 structured.offset_limit = error.token_.end_ - begin_;
00862 structured.message = error.message_;
00863 allErrors.push_back(structured);
00864 }
00865 return allErrors;
00866 }
00867
00868 bool Reader::pushError(const Value& value, const std::string& message) {
00869 size_t length = end_ - begin_;
00870 if(value.getOffsetStart() > length
00871 || value.getOffsetLimit() > length)
00872 return false;
00873 Token token;
00874 token.type_ = tokenError;
00875 token.start_ = begin_ + value.getOffsetStart();
00876 token.end_ = end_ + value.getOffsetLimit();
00877 ErrorInfo info;
00878 info.token_ = token;
00879 info.message_ = message;
00880 info.extra_ = 0;
00881 errors_.push_back(info);
00882 return true;
00883 }
00884
00885 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
00886 size_t length = end_ - begin_;
00887 if(value.getOffsetStart() > length
00888 || value.getOffsetLimit() > length
00889 || extra.getOffsetLimit() > length)
00890 return false;
00891 Token token;
00892 token.type_ = tokenError;
00893 token.start_ = begin_ + value.getOffsetStart();
00894 token.end_ = begin_ + value.getOffsetLimit();
00895 ErrorInfo info;
00896 info.token_ = token;
00897 info.message_ = message;
00898 info.extra_ = begin_ + extra.getOffsetStart();
00899 errors_.push_back(info);
00900 return true;
00901 }
00902
00903 bool Reader::good() const {
00904 return !errors_.size();
00905 }
00906
00907
00908 class OurFeatures {
00909 public:
00910 static OurFeatures all();
00911 OurFeatures();
00912 bool allowComments_;
00913 bool strictRoot_;
00914 bool allowDroppedNullPlaceholders_;
00915 bool allowNumericKeys_;
00916 bool allowSingleQuotes_;
00917 bool failIfExtra_;
00918 bool rejectDupKeys_;
00919 int stackLimit_;
00920 };
00921
00922
00923
00924
00925 OurFeatures::OurFeatures()
00926 : allowComments_(true), strictRoot_(false)
00927 , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false)
00928 , allowSingleQuotes_(false)
00929 , failIfExtra_(false)
00930 {
00931 }
00932
00933 OurFeatures OurFeatures::all() { return OurFeatures(); }
00934
00935
00936
00937
00938
00939 class OurReader {
00940 public:
00941 typedef char Char;
00942 typedef const Char* Location;
00943 struct StructuredError {
00944 size_t offset_start;
00945 size_t offset_limit;
00946 std::string message;
00947 };
00948
00949 OurReader(OurFeatures const& features);
00950 bool parse(const char* beginDoc,
00951 const char* endDoc,
00952 Value& root,
00953 bool collectComments = true);
00954 std::string getFormattedErrorMessages() const;
00955 std::vector<StructuredError> getStructuredErrors() const;
00956 bool pushError(const Value& value, const std::string& message);
00957 bool pushError(const Value& value, const std::string& message, const Value& extra);
00958 bool good() const;
00959
00960 private:
00961 OurReader(OurReader const&);
00962 void operator=(OurReader const&);
00963
00964 enum TokenType {
00965 tokenEndOfStream = 0,
00966 tokenObjectBegin,
00967 tokenObjectEnd,
00968 tokenArrayBegin,
00969 tokenArrayEnd,
00970 tokenString,
00971 tokenNumber,
00972 tokenTrue,
00973 tokenFalse,
00974 tokenNull,
00975 tokenArraySeparator,
00976 tokenMemberSeparator,
00977 tokenComment,
00978 tokenError
00979 };
00980
00981 class Token {
00982 public:
00983 TokenType type_;
00984 Location start_;
00985 Location end_;
00986 };
00987
00988 class ErrorInfo {
00989 public:
00990 Token token_;
00991 std::string message_;
00992 Location extra_;
00993 };
00994
00995 typedef std::deque<ErrorInfo> Errors;
00996
00997 bool readToken(Token& token);
00998 void skipSpaces();
00999 bool match(Location pattern, int patternLength);
01000 bool readComment();
01001 bool readCStyleComment();
01002 bool readCppStyleComment();
01003 bool readString();
01004 bool readStringSingleQuote();
01005 void readNumber();
01006 bool readValue();
01007 bool readObject(Token& token);
01008 bool readArray(Token& token);
01009 bool decodeNumber(Token& token);
01010 bool decodeNumber(Token& token, Value& decoded);
01011 bool decodeString(Token& token);
01012 bool decodeString(Token& token, std::string& decoded);
01013 bool decodeDouble(Token& token);
01014 bool decodeDouble(Token& token, Value& decoded);
01015 bool decodeUnicodeCodePoint(Token& token,
01016 Location& current,
01017 Location end,
01018 unsigned int& unicode);
01019 bool decodeUnicodeEscapeSequence(Token& token,
01020 Location& current,
01021 Location end,
01022 unsigned int& unicode);
01023 bool addError(const std::string& message, Token& token, Location extra = 0);
01024 bool recoverFromError(TokenType skipUntilToken);
01025 bool addErrorAndRecover(const std::string& message,
01026 Token& token,
01027 TokenType skipUntilToken);
01028 void skipUntilSpace();
01029 Value& currentValue();
01030 Char getNextChar();
01031 void
01032 getLocationLineAndColumn(Location location, int& line, int& column) const;
01033 std::string getLocationLineAndColumn(Location location) const;
01034 void addComment(Location begin, Location end, CommentPlacement placement);
01035 void skipCommentTokens(Token& token);
01036
01037 typedef std::stack<Value*> Nodes;
01038 Nodes nodes_;
01039 Errors errors_;
01040 std::string document_;
01041 Location begin_;
01042 Location end_;
01043 Location current_;
01044 Location lastValueEnd_;
01045 Value* lastValue_;
01046 std::string commentsBefore_;
01047 int stackDepth_;
01048
01049 OurFeatures const features_;
01050 bool collectComments_;
01051 };
01052
01053
01054
01055 OurReader::OurReader(OurFeatures const& features)
01056 : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
01057 lastValue_(), commentsBefore_(), features_(features), collectComments_() {
01058 }
01059
01060 bool OurReader::parse(const char* beginDoc,
01061 const char* endDoc,
01062 Value& root,
01063 bool collectComments) {
01064 if (!features_.allowComments_) {
01065 collectComments = false;
01066 }
01067
01068 begin_ = beginDoc;
01069 end_ = endDoc;
01070 collectComments_ = collectComments;
01071 current_ = begin_;
01072 lastValueEnd_ = 0;
01073 lastValue_ = 0;
01074 commentsBefore_ = "";
01075 errors_.clear();
01076 while (!nodes_.empty())
01077 nodes_.pop();
01078 nodes_.push(&root);
01079
01080 stackDepth_ = 0;
01081 bool successful = readValue();
01082 Token token;
01083 skipCommentTokens(token);
01084 if (features_.failIfExtra_) {
01085 if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
01086 addError("Extra non-whitespace after JSON value.", token);
01087 return false;
01088 }
01089 }
01090 if (collectComments_ && !commentsBefore_.empty())
01091 root.setComment(commentsBefore_, commentAfter);
01092 if (features_.strictRoot_) {
01093 if (!root.isArray() && !root.isObject()) {
01094
01095
01096 token.type_ = tokenError;
01097 token.start_ = beginDoc;
01098 token.end_ = endDoc;
01099 addError(
01100 "A valid JSON document must be either an array or an object value.",
01101 token);
01102 return false;
01103 }
01104 }
01105 return successful;
01106 }
01107
01108 bool OurReader::readValue() {
01109 if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
01110 ++stackDepth_;
01111 Token token;
01112 skipCommentTokens(token);
01113 bool successful = true;
01114
01115 if (collectComments_ && !commentsBefore_.empty()) {
01116 currentValue().setComment(commentsBefore_, commentBefore);
01117 commentsBefore_ = "";
01118 }
01119
01120 switch (token.type_) {
01121 case tokenObjectBegin:
01122 successful = readObject(token);
01123 currentValue().setOffsetLimit(current_ - begin_);
01124 break;
01125 case tokenArrayBegin:
01126 successful = readArray(token);
01127 currentValue().setOffsetLimit(current_ - begin_);
01128 break;
01129 case tokenNumber:
01130 successful = decodeNumber(token);
01131 break;
01132 case tokenString:
01133 successful = decodeString(token);
01134 break;
01135 case tokenTrue:
01136 {
01137 Value v(true);
01138 currentValue().swapPayload(v);
01139 currentValue().setOffsetStart(token.start_ - begin_);
01140 currentValue().setOffsetLimit(token.end_ - begin_);
01141 }
01142 break;
01143 case tokenFalse:
01144 {
01145 Value v(false);
01146 currentValue().swapPayload(v);
01147 currentValue().setOffsetStart(token.start_ - begin_);
01148 currentValue().setOffsetLimit(token.end_ - begin_);
01149 }
01150 break;
01151 case tokenNull:
01152 {
01153 Value v;
01154 currentValue().swapPayload(v);
01155 currentValue().setOffsetStart(token.start_ - begin_);
01156 currentValue().setOffsetLimit(token.end_ - begin_);
01157 }
01158 break;
01159 case tokenArraySeparator:
01160 case tokenObjectEnd:
01161 case tokenArrayEnd:
01162 if (features_.allowDroppedNullPlaceholders_) {
01163
01164
01165 current_--;
01166 Value v;
01167 currentValue().swapPayload(v);
01168 currentValue().setOffsetStart(current_ - begin_ - 1);
01169 currentValue().setOffsetLimit(current_ - begin_);
01170 break;
01171 }
01172 default:
01173 currentValue().setOffsetStart(token.start_ - begin_);
01174 currentValue().setOffsetLimit(token.end_ - begin_);
01175 return addError("Syntax error: value, object or array expected.", token);
01176 }
01177
01178 if (collectComments_) {
01179 lastValueEnd_ = current_;
01180 lastValue_ = ¤tValue();
01181 }
01182
01183 --stackDepth_;
01184 return successful;
01185 }
01186
01187 void OurReader::skipCommentTokens(Token& token) {
01188 if (features_.allowComments_) {
01189 do {
01190 readToken(token);
01191 } while (token.type_ == tokenComment);
01192 } else {
01193 readToken(token);
01194 }
01195 }
01196
01197 bool OurReader::readToken(Token& token) {
01198 skipSpaces();
01199 token.start_ = current_;
01200 Char c = getNextChar();
01201 bool ok = true;
01202 switch (c) {
01203 case '{':
01204 token.type_ = tokenObjectBegin;
01205 break;
01206 case '}':
01207 token.type_ = tokenObjectEnd;
01208 break;
01209 case '[':
01210 token.type_ = tokenArrayBegin;
01211 break;
01212 case ']':
01213 token.type_ = tokenArrayEnd;
01214 break;
01215 case '"':
01216 token.type_ = tokenString;
01217 ok = readString();
01218 break;
01219 case '\'':
01220 if (features_.allowSingleQuotes_) {
01221 token.type_ = tokenString;
01222 ok = readStringSingleQuote();
01223 break;
01224 }
01225 case '/':
01226 token.type_ = tokenComment;
01227 ok = readComment();
01228 break;
01229 case '0':
01230 case '1':
01231 case '2':
01232 case '3':
01233 case '4':
01234 case '5':
01235 case '6':
01236 case '7':
01237 case '8':
01238 case '9':
01239 case '-':
01240 token.type_ = tokenNumber;
01241 readNumber();
01242 break;
01243 case 't':
01244 token.type_ = tokenTrue;
01245 ok = match("rue", 3);
01246 break;
01247 case 'f':
01248 token.type_ = tokenFalse;
01249 ok = match("alse", 4);
01250 break;
01251 case 'n':
01252 token.type_ = tokenNull;
01253 ok = match("ull", 3);
01254 break;
01255 case ',':
01256 token.type_ = tokenArraySeparator;
01257 break;
01258 case ':':
01259 token.type_ = tokenMemberSeparator;
01260 break;
01261 case 0:
01262 token.type_ = tokenEndOfStream;
01263 break;
01264 default:
01265 ok = false;
01266 break;
01267 }
01268 if (!ok)
01269 token.type_ = tokenError;
01270 token.end_ = current_;
01271 return true;
01272 }
01273
01274 void OurReader::skipSpaces() {
01275 while (current_ != end_) {
01276 Char c = *current_;
01277 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
01278 ++current_;
01279 else
01280 break;
01281 }
01282 }
01283
01284 bool OurReader::match(Location pattern, int patternLength) {
01285 if (end_ - current_ < patternLength)
01286 return false;
01287 int index = patternLength;
01288 while (index--)
01289 if (current_[index] != pattern[index])
01290 return false;
01291 current_ += patternLength;
01292 return true;
01293 }
01294
01295 bool OurReader::readComment() {
01296 Location commentBegin = current_ - 1;
01297 Char c = getNextChar();
01298 bool successful = false;
01299 if (c == '*')
01300 successful = readCStyleComment();
01301 else if (c == '/')
01302 successful = readCppStyleComment();
01303 if (!successful)
01304 return false;
01305
01306 if (collectComments_) {
01307 CommentPlacement placement = commentBefore;
01308 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
01309 if (c != '*' || !containsNewLine(commentBegin, current_))
01310 placement = commentAfterOnSameLine;
01311 }
01312
01313 addComment(commentBegin, current_, placement);
01314 }
01315 return true;
01316 }
01317
01318 void
01319 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
01320 assert(collectComments_);
01321 const std::string& normalized = normalizeEOL(begin, end);
01322 if (placement == commentAfterOnSameLine) {
01323 assert(lastValue_ != 0);
01324 lastValue_->setComment(normalized, placement);
01325 } else {
01326 commentsBefore_ += normalized;
01327 }
01328 }
01329
01330 bool OurReader::readCStyleComment() {
01331 while (current_ != end_) {
01332 Char c = getNextChar();
01333 if (c == '*' && *current_ == '/')
01334 break;
01335 }
01336 return getNextChar() == '/';
01337 }
01338
01339 bool OurReader::readCppStyleComment() {
01340 while (current_ != end_) {
01341 Char c = getNextChar();
01342 if (c == '\n')
01343 break;
01344 if (c == '\r') {
01345
01346 if (current_ != end_ && *current_ == '\n')
01347 getNextChar();
01348
01349 break;
01350 }
01351 }
01352 return true;
01353 }
01354
01355 void OurReader::readNumber() {
01356 const char *p = current_;
01357 char c = '0';
01358
01359 while (c >= '0' && c <= '9')
01360 c = (current_ = p) < end_ ? *p++ : 0;
01361
01362 if (c == '.') {
01363 c = (current_ = p) < end_ ? *p++ : 0;
01364 while (c >= '0' && c <= '9')
01365 c = (current_ = p) < end_ ? *p++ : 0;
01366 }
01367
01368 if (c == 'e' || c == 'E') {
01369 c = (current_ = p) < end_ ? *p++ : 0;
01370 if (c == '+' || c == '-')
01371 c = (current_ = p) < end_ ? *p++ : 0;
01372 while (c >= '0' && c <= '9')
01373 c = (current_ = p) < end_ ? *p++ : 0;
01374 }
01375 }
01376 bool OurReader::readString() {
01377 Char c = 0;
01378 while (current_ != end_) {
01379 c = getNextChar();
01380 if (c == '\\')
01381 getNextChar();
01382 else if (c == '"')
01383 break;
01384 }
01385 return c == '"';
01386 }
01387
01388
01389 bool OurReader::readStringSingleQuote() {
01390 Char c = 0;
01391 while (current_ != end_) {
01392 c = getNextChar();
01393 if (c == '\\')
01394 getNextChar();
01395 else if (c == '\'')
01396 break;
01397 }
01398 return c == '\'';
01399 }
01400
01401 bool OurReader::readObject(Token& tokenStart) {
01402 Token tokenName;
01403 std::string name;
01404 Value init(objectValue);
01405 currentValue().swapPayload(init);
01406 currentValue().setOffsetStart(tokenStart.start_ - begin_);
01407 while (readToken(tokenName)) {
01408 bool initialTokenOk = true;
01409 while (tokenName.type_ == tokenComment && initialTokenOk)
01410 initialTokenOk = readToken(tokenName);
01411 if (!initialTokenOk)
01412 break;
01413 if (tokenName.type_ == tokenObjectEnd && name.empty())
01414 return true;
01415 name = "";
01416 if (tokenName.type_ == tokenString) {
01417 if (!decodeString(tokenName, name))
01418 return recoverFromError(tokenObjectEnd);
01419 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
01420 Value numberName;
01421 if (!decodeNumber(tokenName, numberName))
01422 return recoverFromError(tokenObjectEnd);
01423 name = numberName.asString();
01424 } else {
01425 break;
01426 }
01427
01428 Token colon;
01429 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
01430 return addErrorAndRecover(
01431 "Missing ':' after object member name", colon, tokenObjectEnd);
01432 }
01433 if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
01434 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
01435 std::string msg = "Duplicate key: '" + name + "'";
01436 return addErrorAndRecover(
01437 msg, tokenName, tokenObjectEnd);
01438 }
01439 Value& value = currentValue()[name];
01440 nodes_.push(&value);
01441 bool ok = readValue();
01442 nodes_.pop();
01443 if (!ok)
01444 return recoverFromError(tokenObjectEnd);
01445
01446 Token comma;
01447 if (!readToken(comma) ||
01448 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
01449 comma.type_ != tokenComment)) {
01450 return addErrorAndRecover(
01451 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
01452 }
01453 bool finalizeTokenOk = true;
01454 while (comma.type_ == tokenComment && finalizeTokenOk)
01455 finalizeTokenOk = readToken(comma);
01456 if (comma.type_ == tokenObjectEnd)
01457 return true;
01458 }
01459 return addErrorAndRecover(
01460 "Missing '}' or object member name", tokenName, tokenObjectEnd);
01461 }
01462
01463 bool OurReader::readArray(Token& tokenStart) {
01464 Value init(arrayValue);
01465 currentValue().swapPayload(init);
01466 currentValue().setOffsetStart(tokenStart.start_ - begin_);
01467 skipSpaces();
01468 if (*current_ == ']')
01469 {
01470 Token endArray;
01471 readToken(endArray);
01472 return true;
01473 }
01474 int index = 0;
01475 for (;;) {
01476 Value& value = currentValue()[index++];
01477 nodes_.push(&value);
01478 bool ok = readValue();
01479 nodes_.pop();
01480 if (!ok)
01481 return recoverFromError(tokenArrayEnd);
01482
01483 Token token;
01484
01485 ok = readToken(token);
01486 while (token.type_ == tokenComment && ok) {
01487 ok = readToken(token);
01488 }
01489 bool badTokenType =
01490 (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
01491 if (!ok || badTokenType) {
01492 return addErrorAndRecover(
01493 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
01494 }
01495 if (token.type_ == tokenArrayEnd)
01496 break;
01497 }
01498 return true;
01499 }
01500
01501 bool OurReader::decodeNumber(Token& token) {
01502 Value decoded;
01503 if (!decodeNumber(token, decoded))
01504 return false;
01505 currentValue().swapPayload(decoded);
01506 currentValue().setOffsetStart(token.start_ - begin_);
01507 currentValue().setOffsetLimit(token.end_ - begin_);
01508 return true;
01509 }
01510
01511 bool OurReader::decodeNumber(Token& token, Value& decoded) {
01512
01513
01514
01515 Location current = token.start_;
01516 bool isNegative = *current == '-';
01517 if (isNegative)
01518 ++current;
01519
01520 Value::LargestUInt maxIntegerValue =
01521 isNegative ? Value::LargestUInt(-Value::minLargestInt)
01522 : Value::maxLargestUInt;
01523 Value::LargestUInt threshold = maxIntegerValue / 10;
01524 Value::LargestUInt value = 0;
01525 while (current < token.end_) {
01526 Char c = *current++;
01527 if (c < '0' || c > '9')
01528 return decodeDouble(token, decoded);
01529 Value::UInt digit(c - '0');
01530 if (value >= threshold) {
01531
01532
01533
01534
01535 if (value > threshold || current != token.end_ ||
01536 digit > maxIntegerValue % 10) {
01537 return decodeDouble(token, decoded);
01538 }
01539 }
01540 value = value * 10 + digit;
01541 }
01542 if (isNegative)
01543 decoded = -Value::LargestInt(value);
01544 else if (value <= Value::LargestUInt(Value::maxInt))
01545 decoded = Value::LargestInt(value);
01546 else
01547 decoded = value;
01548 return true;
01549 }
01550
01551 bool OurReader::decodeDouble(Token& token) {
01552 Value decoded;
01553 if (!decodeDouble(token, decoded))
01554 return false;
01555 currentValue().swapPayload(decoded);
01556 currentValue().setOffsetStart(token.start_ - begin_);
01557 currentValue().setOffsetLimit(token.end_ - begin_);
01558 return true;
01559 }
01560
01561 bool OurReader::decodeDouble(Token& token, Value& decoded) {
01562 double value = 0;
01563 const int bufferSize = 32;
01564 int count;
01565 int length = int(token.end_ - token.start_);
01566
01567
01568 if (length < 0) {
01569 return addError("Unable to parse token length", token);
01570 }
01571
01572
01573
01574
01575
01576
01577 char format[] = "%lf";
01578
01579 if (length <= bufferSize) {
01580 Char buffer[bufferSize + 1];
01581 memcpy(buffer, token.start_, length);
01582 buffer[length] = 0;
01583 count = sscanf(buffer, format, &value);
01584 } else {
01585 std::string buffer(token.start_, token.end_);
01586 count = sscanf(buffer.c_str(), format, &value);
01587 }
01588
01589 if (count != 1)
01590 return addError("'" + std::string(token.start_, token.end_) +
01591 "' is not a number.",
01592 token);
01593 decoded = value;
01594 return true;
01595 }
01596
01597 bool OurReader::decodeString(Token& token) {
01598 std::string decoded_string;
01599 if (!decodeString(token, decoded_string))
01600 return false;
01601 Value decoded(decoded_string);
01602 currentValue().swapPayload(decoded);
01603 currentValue().setOffsetStart(token.start_ - begin_);
01604 currentValue().setOffsetLimit(token.end_ - begin_);
01605 return true;
01606 }
01607
01608 bool OurReader::decodeString(Token& token, std::string& decoded) {
01609 decoded.reserve(token.end_ - token.start_ - 2);
01610 Location current = token.start_ + 1;
01611 Location end = token.end_ - 1;
01612 while (current != end) {
01613 Char c = *current++;
01614 if (c == '"')
01615 break;
01616 else if (c == '\\') {
01617 if (current == end)
01618 return addError("Empty escape sequence in string", token, current);
01619 Char escape = *current++;
01620 switch (escape) {
01621 case '"':
01622 decoded += '"';
01623 break;
01624 case '/':
01625 decoded += '/';
01626 break;
01627 case '\\':
01628 decoded += '\\';
01629 break;
01630 case 'b':
01631 decoded += '\b';
01632 break;
01633 case 'f':
01634 decoded += '\f';
01635 break;
01636 case 'n':
01637 decoded += '\n';
01638 break;
01639 case 'r':
01640 decoded += '\r';
01641 break;
01642 case 't':
01643 decoded += '\t';
01644 break;
01645 case 'u': {
01646 unsigned int unicode;
01647 if (!decodeUnicodeCodePoint(token, current, end, unicode))
01648 return false;
01649 decoded += codePointToUTF8(unicode);
01650 } break;
01651 default:
01652 return addError("Bad escape sequence in string", token, current);
01653 }
01654 } else {
01655 decoded += c;
01656 }
01657 }
01658 return true;
01659 }
01660
01661 bool OurReader::decodeUnicodeCodePoint(Token& token,
01662 Location& current,
01663 Location end,
01664 unsigned int& unicode) {
01665
01666 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
01667 return false;
01668 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
01669
01670 if (end - current < 6)
01671 return addError(
01672 "additional six characters expected to parse unicode surrogate pair.",
01673 token,
01674 current);
01675 unsigned int surrogatePair;
01676 if (*(current++) == '\\' && *(current++) == 'u') {
01677 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
01678 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
01679 } else
01680 return false;
01681 } else
01682 return addError("expecting another \\u token to begin the second half of "
01683 "a unicode surrogate pair",
01684 token,
01685 current);
01686 }
01687 return true;
01688 }
01689
01690 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
01691 Location& current,
01692 Location end,
01693 unsigned int& unicode) {
01694 if (end - current < 4)
01695 return addError(
01696 "Bad unicode escape sequence in string: four digits expected.",
01697 token,
01698 current);
01699 unicode = 0;
01700 for (int index = 0; index < 4; ++index) {
01701 Char c = *current++;
01702 unicode *= 16;
01703 if (c >= '0' && c <= '9')
01704 unicode += c - '0';
01705 else if (c >= 'a' && c <= 'f')
01706 unicode += c - 'a' + 10;
01707 else if (c >= 'A' && c <= 'F')
01708 unicode += c - 'A' + 10;
01709 else
01710 return addError(
01711 "Bad unicode escape sequence in string: hexadecimal digit expected.",
01712 token,
01713 current);
01714 }
01715 return true;
01716 }
01717
01718 bool
01719 OurReader::addError(const std::string& message, Token& token, Location extra) {
01720 ErrorInfo info;
01721 info.token_ = token;
01722 info.message_ = message;
01723 info.extra_ = extra;
01724 errors_.push_back(info);
01725 return false;
01726 }
01727
01728 bool OurReader::recoverFromError(TokenType skipUntilToken) {
01729 int errorCount = int(errors_.size());
01730 Token skip;
01731 for (;;) {
01732 if (!readToken(skip))
01733 errors_.resize(errorCount);
01734 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
01735 break;
01736 }
01737 errors_.resize(errorCount);
01738 return false;
01739 }
01740
01741 bool OurReader::addErrorAndRecover(const std::string& message,
01742 Token& token,
01743 TokenType skipUntilToken) {
01744 addError(message, token);
01745 return recoverFromError(skipUntilToken);
01746 }
01747
01748 Value& OurReader::currentValue() { return *(nodes_.top()); }
01749
01750 OurReader::Char OurReader::getNextChar() {
01751 if (current_ == end_)
01752 return 0;
01753 return *current_++;
01754 }
01755
01756 void OurReader::getLocationLineAndColumn(Location location,
01757 int& line,
01758 int& column) const {
01759 Location current = begin_;
01760 Location lastLineStart = current;
01761 line = 0;
01762 while (current < location && current != end_) {
01763 Char c = *current++;
01764 if (c == '\r') {
01765 if (*current == '\n')
01766 ++current;
01767 lastLineStart = current;
01768 ++line;
01769 } else if (c == '\n') {
01770 lastLineStart = current;
01771 ++line;
01772 }
01773 }
01774
01775 column = int(location - lastLineStart) + 1;
01776 ++line;
01777 }
01778
01779 std::string OurReader::getLocationLineAndColumn(Location location) const {
01780 int line, column;
01781 getLocationLineAndColumn(location, line, column);
01782 char buffer[18 + 16 + 16 + 1];
01783 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
01784 #if defined(WINCE)
01785 _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01786 #else
01787 sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01788 #endif
01789 #else
01790 snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01791 #endif
01792 return buffer;
01793 }
01794
01795 std::string OurReader::getFormattedErrorMessages() const {
01796 std::string formattedMessage;
01797 for (Errors::const_iterator itError = errors_.begin();
01798 itError != errors_.end();
01799 ++itError) {
01800 const ErrorInfo& error = *itError;
01801 formattedMessage +=
01802 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
01803 formattedMessage += " " + error.message_ + "\n";
01804 if (error.extra_)
01805 formattedMessage +=
01806 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
01807 }
01808 return formattedMessage;
01809 }
01810
01811 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
01812 std::vector<OurReader::StructuredError> allErrors;
01813 for (Errors::const_iterator itError = errors_.begin();
01814 itError != errors_.end();
01815 ++itError) {
01816 const ErrorInfo& error = *itError;
01817 OurReader::StructuredError structured;
01818 structured.offset_start = error.token_.start_ - begin_;
01819 structured.offset_limit = error.token_.end_ - begin_;
01820 structured.message = error.message_;
01821 allErrors.push_back(structured);
01822 }
01823 return allErrors;
01824 }
01825
01826 bool OurReader::pushError(const Value& value, const std::string& message) {
01827 size_t length = end_ - begin_;
01828 if(value.getOffsetStart() > length
01829 || value.getOffsetLimit() > length)
01830 return false;
01831 Token token;
01832 token.type_ = tokenError;
01833 token.start_ = begin_ + value.getOffsetStart();
01834 token.end_ = end_ + value.getOffsetLimit();
01835 ErrorInfo info;
01836 info.token_ = token;
01837 info.message_ = message;
01838 info.extra_ = 0;
01839 errors_.push_back(info);
01840 return true;
01841 }
01842
01843 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
01844 size_t length = end_ - begin_;
01845 if(value.getOffsetStart() > length
01846 || value.getOffsetLimit() > length
01847 || extra.getOffsetLimit() > length)
01848 return false;
01849 Token token;
01850 token.type_ = tokenError;
01851 token.start_ = begin_ + value.getOffsetStart();
01852 token.end_ = begin_ + value.getOffsetLimit();
01853 ErrorInfo info;
01854 info.token_ = token;
01855 info.message_ = message;
01856 info.extra_ = begin_ + extra.getOffsetStart();
01857 errors_.push_back(info);
01858 return true;
01859 }
01860
01861 bool OurReader::good() const {
01862 return !errors_.size();
01863 }
01864
01865
01866 class OurCharReader : public CharReader {
01867 bool const collectComments_;
01868 OurReader reader_;
01869 public:
01870 OurCharReader(
01871 bool collectComments,
01872 OurFeatures const& features)
01873 : collectComments_(collectComments)
01874 , reader_(features)
01875 {}
01876 virtual bool parse(
01877 char const* beginDoc, char const* endDoc,
01878 Value* root, std::string* errs) {
01879 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
01880 if (errs) {
01881 *errs = reader_.getFormattedErrorMessages();
01882 }
01883 return ok;
01884 }
01885 };
01886
01887 CharReaderBuilder::CharReaderBuilder()
01888 {
01889 setDefaults(&settings_);
01890 }
01891 CharReaderBuilder::~CharReaderBuilder()
01892 {}
01893 CharReader* CharReaderBuilder::newCharReader() const
01894 {
01895 bool collectComments = settings_["collectComments"].asBool();
01896 OurFeatures features = OurFeatures::all();
01897 features.allowComments_ = settings_["allowComments"].asBool();
01898 features.strictRoot_ = settings_["strictRoot"].asBool();
01899 features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
01900 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
01901 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
01902 features.stackLimit_ = settings_["stackLimit"].asInt();
01903 features.failIfExtra_ = settings_["failIfExtra"].asBool();
01904 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
01905 return new OurCharReader(collectComments, features);
01906 }
01907 static void getValidReaderKeys(std::set<std::string>* valid_keys)
01908 {
01909 valid_keys->clear();
01910 valid_keys->insert("collectComments");
01911 valid_keys->insert("allowComments");
01912 valid_keys->insert("strictRoot");
01913 valid_keys->insert("allowDroppedNullPlaceholders");
01914 valid_keys->insert("allowNumericKeys");
01915 valid_keys->insert("allowSingleQuotes");
01916 valid_keys->insert("stackLimit");
01917 valid_keys->insert("failIfExtra");
01918 valid_keys->insert("rejectDupKeys");
01919 }
01920 bool CharReaderBuilder::validate(Json::Value* invalid) const
01921 {
01922 Json::Value my_invalid;
01923 if (!invalid) invalid = &my_invalid;
01924 Json::Value& inv = *invalid;
01925 std::set<std::string> valid_keys;
01926 getValidReaderKeys(&valid_keys);
01927 Value::Members keys = settings_.getMemberNames();
01928 size_t n = keys.size();
01929 for (size_t i = 0; i < n; ++i) {
01930 std::string const& key = keys[i];
01931 if (valid_keys.find(key) == valid_keys.end()) {
01932 inv[key] = settings_[key];
01933 }
01934 }
01935 return 0u == inv.size();
01936 }
01937 Value& CharReaderBuilder::operator[](std::string key)
01938 {
01939 return settings_[key];
01940 }
01941
01942 void CharReaderBuilder::strictMode(Json::Value* settings)
01943 {
01945 (*settings)["allowComments"] = false;
01946 (*settings)["strictRoot"] = true;
01947 (*settings)["allowDroppedNullPlaceholders"] = false;
01948 (*settings)["allowNumericKeys"] = false;
01949 (*settings)["allowSingleQuotes"] = false;
01950 (*settings)["failIfExtra"] = true;
01951 (*settings)["rejectDupKeys"] = true;
01953 }
01954
01955 void CharReaderBuilder::setDefaults(Json::Value* settings)
01956 {
01958 (*settings)["collectComments"] = true;
01959 (*settings)["allowComments"] = true;
01960 (*settings)["strictRoot"] = false;
01961 (*settings)["allowDroppedNullPlaceholders"] = false;
01962 (*settings)["allowNumericKeys"] = false;
01963 (*settings)["allowSingleQuotes"] = false;
01964 (*settings)["stackLimit"] = 1000;
01965 (*settings)["failIfExtra"] = false;
01966 (*settings)["rejectDupKeys"] = false;
01968 }
01969
01971
01972
01973 bool parseFromStream(
01974 CharReader::Factory const& fact, std::istream& sin,
01975 Value* root, std::string* errs)
01976 {
01977 std::ostringstream ssin;
01978 ssin << sin.rdbuf();
01979 std::string doc = ssin.str();
01980 char const* begin = doc.data();
01981 char const* end = begin + doc.size();
01982
01983 CharReaderPtr const reader(fact.newCharReader());
01984 return reader->parse(begin, end, root, errs);
01985 }
01986
01987 std::istream& operator>>(std::istream& sin, Value& root) {
01988 CharReaderBuilder b;
01989 std::string errs;
01990 bool ok = parseFromStream(b, sin, &root, &errs);
01991 if (!ok) {
01992 fprintf(stderr,
01993 "Error from reader: %s",
01994 errs.c_str());
01995
01996 throwRuntimeError("reader error");
01997 }
01998 return sin;
01999 }
02000
02001 }