mirror of
				https://github.com/eledio-devices/thirdparty-ArduinoJson.git
				synced 2025-10-31 08:42:39 +01:00 
			
		
		
		
	Improved decoding of UTF-16 surrogate pairs (closes #1157)
This commit is contained in:
		
				
					committed by
					
						 Benoit Blanchon
						Benoit Blanchon
					
				
			
			
				
	
			
			
			
						parent
						
							8550418875
						
					
				
				
					commit
					91b808381e
				
			| @@ -7,7 +7,10 @@ HEAD | ||||
| * Added `BasicJsonDocument::shrinkToFit()` | ||||
| * Added support of `uint8_t` for `serializeJson()`, `serializeJsonPretty()`, and `serializeMsgPack()` (issue #1142) | ||||
| * Auto enable support for `std::string` and `std::stream` on modern compilers (issue #1156) | ||||
|   No need to define `ARDUINOJSON_ENABLE_STD_STRING` and `ARDUINOJSON_ENABLE_STD_STREAM`. | ||||
|   (No need to define `ARDUINOJSON_ENABLE_STD_STRING` and `ARDUINOJSON_ENABLE_STD_STREAM` anymore) | ||||
| * Improved decoding of UTF-16 surrogate pairs (PR #1157 by @kaysievers) | ||||
|   (ArduinoJson now produces standard UTF-8 instead of CESU-8) | ||||
|  | ||||
|  | ||||
| v6.13.0 (2019-11-01) | ||||
| ------- | ||||
|   | ||||
| @@ -7,9 +7,10 @@ | ||||
| #include <catch.hpp> | ||||
|  | ||||
| TEST_CASE("Invalid JSON input") { | ||||
|   const char* testCases[] = {"'\\u'",     "'\\u000g'", "'\\u000'", "'\\u000G'", | ||||
|                              "'\\u000/'", "\\x1234",   "6a9",      "1,", | ||||
|                              "2]",        "3}"}; | ||||
|   const char* testCases[] = { | ||||
|       "'\\u'",     "'\\u000g'",  "'\\u000'",  "'\\u000G'", "'\\ud83d\\ud83d'", | ||||
|       "'\\udda4'", "'\\ud83d_'", "'\\u000/'", "\\x1234",   "6a9", | ||||
|       "1,",        "2]",         "3}"}; | ||||
|   const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); | ||||
|  | ||||
|   DynamicJsonDocument doc(4096); | ||||
|   | ||||
| @@ -17,10 +17,10 @@ TEST_CASE("Valid JSON strings value") { | ||||
|       {"\'hello world\'", "hello world"}, | ||||
|       {"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"}, | ||||
|       {"'\\u0041'", "A"}, | ||||
|       {"'\\u00e4'", "\xc3\xa4"},      // ä | ||||
|       {"'\\u00E4'", "\xc3\xa4"},      // ä | ||||
|       {"'\\u3042'", "\xe3\x81\x82"},  // あ | ||||
|  | ||||
|       {"'\\u00e4'", "\xc3\xa4"},                 // ä | ||||
|       {"'\\u00E4'", "\xc3\xa4"},                 // ä | ||||
|       {"'\\u3042'", "\xe3\x81\x82"},             // あ | ||||
|       {"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"},  // 🖤 | ||||
|   }; | ||||
|   const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); | ||||
|  | ||||
|   | ||||
| @@ -189,6 +189,7 @@ class JsonDeserializer { | ||||
|  | ||||
|   DeserializationError parseQuotedString(const char *&result) { | ||||
|     StringBuilder builder = _stringStorage.startString(); | ||||
|     uint16_t surrogate1 = 0; | ||||
|     const char stopChar = current(); | ||||
|  | ||||
|     move(); | ||||
| @@ -208,7 +209,19 @@ class JsonDeserializer { | ||||
|           move(); | ||||
|           DeserializationError err = parseCodepoint(codepoint); | ||||
|           if (err) return err; | ||||
|           Utf8::encodeCodepoint(codepoint, builder); | ||||
|           if (codepoint >= 0xd800 && codepoint <= 0xdbff) { | ||||
|             if (surrogate1 > 0) return DeserializationError::InvalidInput; | ||||
|             surrogate1 = codepoint; | ||||
|           } else if (codepoint >= 0xdc00 && codepoint <= 0xdfff) { | ||||
|             if (surrogate1 == 0) return DeserializationError::InvalidInput; | ||||
|             uint32_t codepoint32 = 0x10000; | ||||
|             codepoint32 += static_cast<uint32_t>(surrogate1 - 0xd800) << 10; | ||||
|             codepoint32 += codepoint - 0xdc00; | ||||
|             Utf8::encodeCodepoint(codepoint32, builder); | ||||
|             surrogate1 = 0; | ||||
|           } else { | ||||
|             Utf8::encodeCodepoint(codepoint, builder); | ||||
|           } | ||||
|           continue; | ||||
| #else | ||||
|           return DeserializationError::NotSupported; | ||||
| @@ -220,6 +233,8 @@ class JsonDeserializer { | ||||
|         move(); | ||||
|       } | ||||
|  | ||||
|       if (surrogate1 > 0) return DeserializationError::InvalidInput; | ||||
|  | ||||
|       builder.append(c); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -10,17 +10,21 @@ namespace ARDUINOJSON_NAMESPACE { | ||||
|  | ||||
| namespace Utf8 { | ||||
| template <typename TStringBuilder> | ||||
| inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) { | ||||
| inline void encodeCodepoint(uint32_t codepoint, TStringBuilder &str) { | ||||
|   if (codepoint < 0x80) { | ||||
|     str.append(char(codepoint)); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   if (codepoint >= 0x00000800) { | ||||
|   if (codepoint < 0x00000800) { | ||||
|     str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6))); | ||||
|   } else if (codepoint < 0x00010000) { | ||||
|     str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12))); | ||||
|     str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80)); | ||||
|   } else { | ||||
|     str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6))); | ||||
|   } else if (codepoint < 0x00110000) { | ||||
|     str.append(char(0xf0 /*0b11110000*/ | (codepoint >> 18))); | ||||
|     str.append(char(((codepoint >> 12) & 0x3f /*0b00111111*/) | 0x80)); | ||||
|     str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80)); | ||||
|   } | ||||
|   str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80)); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user